| /* |
| * Copyright 2022 Advanced Micro Devices, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * Authors: AMD |
| * |
| */ |
| #include "display_mode_vba_util_32.h" |
| #include "../dml_inline_defs.h" |
| #include "display_mode_vba_32.h" |
| #include "../display_mode_lib.h" |
| |
| #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 |
| |
| unsigned int dml32_dscceComputeDelay( |
| unsigned int bpc, |
| double BPP, |
| unsigned int sliceWidth, |
| unsigned int numSlices, |
| enum output_format_class pixelFormat, |
| enum output_encoder_class Output) |
| { |
| // valid bpc = source bits per component in the set of {8, 10, 12} |
| // valid bpp = increments of 1/16 of a bit |
| // min = 6/7/8 in N420/N422/444, respectively |
| // max = such that compression is 1:1 |
| //valid sliceWidth = number of pixels per slice line, |
| // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) |
| //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} |
| //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} |
| |
| // fixed value |
| unsigned int rcModelSize = 8192; |
| |
| // N422/N420 operate at 2 pixels per clock |
| unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, |
| Delay, pixels; |
| |
| if (pixelFormat == dm_420) |
| pixelsPerClock = 2; |
| else if (pixelFormat == dm_n422) |
| pixelsPerClock = 2; |
| // #all other modes operate at 1 pixel per clock |
| else |
| pixelsPerClock = 1; |
| |
| //initial transmit delay as per PPS |
| initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); |
| |
| //compute ssm delay |
| if (bpc == 8) |
| D = 81; |
| else if (bpc == 10) |
| D = 89; |
| else |
| D = 113; |
| |
| //divide by pixel per cycle to compute slice width as seen by DSC |
| w = sliceWidth / pixelsPerClock; |
| |
| //422 mode has an additional cycle of delay |
| if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) |
| s = 0; |
| else |
| s = 1; |
| |
| //main calculation for the dscce |
| ix = initalXmitDelay + 45; |
| wx = (w + 2) / 3; |
| p = 3 * wx - w; |
| l0 = ix / w; |
| a = ix + p * l0; |
| ax = (a + 2) / 3 + D + 6 + 1; |
| L = (ax + wx - 1) / wx; |
| if ((ix % w) == 0 && p != 0) |
| lstall = 1; |
| else |
| lstall = 0; |
| Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; |
| |
| //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels |
| pixels = Delay * 3 * pixelsPerClock; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: bpc: %d\n", __func__, bpc); |
| dml_print("DML::%s: BPP: %f\n", __func__, BPP); |
| dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); |
| dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); |
| dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); |
| dml_print("DML::%s: Output: %d\n", __func__, Output); |
| dml_print("DML::%s: pixels: %d\n", __func__, pixels); |
| #endif |
| |
| return pixels; |
| } |
| |
| unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) |
| { |
| unsigned int Delay = 0; |
| |
| if (pixelFormat == dm_420) { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 0; |
| // dscc - input deserializer |
| Delay = Delay + 3; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 2; |
| // dscc - input cdc fifo |
| Delay = Delay + 12; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 13; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 7; |
| // dscc gets pixels every other cycle |
| Delay = Delay + 3; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // sft |
| Delay = Delay + 1; |
| } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 1; |
| // dscc - input deserializer |
| Delay = Delay + 5; |
| // dscc - input cdc fifo |
| Delay = Delay + 25; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 10; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // sft |
| Delay = Delay + 1; |
| } else { |
| // sfr |
| Delay = Delay + 2; |
| // dsccif |
| Delay = Delay + 0; |
| // dscc - input deserializer |
| Delay = Delay + 3; |
| // dscc - input cdc fifo |
| Delay = Delay + 12; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // dscc - output cdc fifo |
| Delay = Delay + 7; |
| // dscc - output serializer |
| Delay = Delay + 1; |
| // dscc - cdc uncertainty |
| Delay = Delay + 2; |
| // sft |
| Delay = Delay + 1; |
| } |
| |
| return Delay; |
| } |
| |
| |
| bool IsVertical(enum dm_rotation_angle Scan) |
| { |
| bool is_vert = false; |
| |
| if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) |
| is_vert = true; |
| else |
| is_vert = false; |
| return is_vert; |
| } |
| |
| void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( |
| double HRatio, |
| double HRatioChroma, |
| double VRatio, |
| double VRatioChroma, |
| double MaxDCHUBToPSCLThroughput, |
| double MaxPSCLToLBThroughput, |
| double PixelClock, |
| enum source_format_class SourcePixelFormat, |
| unsigned int HTaps, |
| unsigned int HTapsChroma, |
| unsigned int VTaps, |
| unsigned int VTapsChroma, |
| |
| /* output */ |
| double *PSCL_THROUGHPUT, |
| double *PSCL_THROUGHPUT_CHROMA, |
| double *DPPCLKUsingSingleDPP) |
| { |
| double DPPCLKUsingSingleDPPLuma; |
| double DPPCLKUsingSingleDPPChroma; |
| |
| if (HRatio > 1) { |
| *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / |
| dml_ceil((double) HTaps / 6.0, 1.0)); |
| } else { |
| *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); |
| } |
| |
| DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / |
| *PSCL_THROUGHPUT, 1); |
| |
| if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) |
| DPPCLKUsingSingleDPPLuma = 2 * PixelClock; |
| |
| if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && |
| SourcePixelFormat != dm_rgbe_alpha)) { |
| *PSCL_THROUGHPUT_CHROMA = 0; |
| *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; |
| } else { |
| if (HRatioChroma > 1) { |
| *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * |
| HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); |
| } else { |
| *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); |
| } |
| DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), |
| HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); |
| if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) |
| DPPCLKUsingSingleDPPChroma = 2 * PixelClock; |
| *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); |
| } |
| } |
| |
| void dml32_CalculateBytePerPixelAndBlockSizes( |
| enum source_format_class SourcePixelFormat, |
| enum dm_swizzle_mode SurfaceTiling, |
| |
| /* Output */ |
| unsigned int *BytePerPixelY, |
| unsigned int *BytePerPixelC, |
| double *BytePerPixelDETY, |
| double *BytePerPixelDETC, |
| unsigned int *BlockHeight256BytesY, |
| unsigned int *BlockHeight256BytesC, |
| unsigned int *BlockWidth256BytesY, |
| unsigned int *BlockWidth256BytesC, |
| unsigned int *MacroTileHeightY, |
| unsigned int *MacroTileHeightC, |
| unsigned int *MacroTileWidthY, |
| unsigned int *MacroTileWidthC) |
| { |
| if (SourcePixelFormat == dm_444_64) { |
| *BytePerPixelDETY = 8; |
| *BytePerPixelDETC = 0; |
| *BytePerPixelY = 8; |
| *BytePerPixelC = 0; |
| } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { |
| *BytePerPixelDETY = 4; |
| *BytePerPixelDETC = 0; |
| *BytePerPixelY = 4; |
| *BytePerPixelC = 0; |
| } else if (SourcePixelFormat == dm_444_16) { |
| *BytePerPixelDETY = 2; |
| *BytePerPixelDETC = 0; |
| *BytePerPixelY = 2; |
| *BytePerPixelC = 0; |
| } else if (SourcePixelFormat == dm_444_8) { |
| *BytePerPixelDETY = 1; |
| *BytePerPixelDETC = 0; |
| *BytePerPixelY = 1; |
| *BytePerPixelC = 0; |
| } else if (SourcePixelFormat == dm_rgbe_alpha) { |
| *BytePerPixelDETY = 4; |
| *BytePerPixelDETC = 1; |
| *BytePerPixelY = 4; |
| *BytePerPixelC = 1; |
| } else if (SourcePixelFormat == dm_420_8) { |
| *BytePerPixelDETY = 1; |
| *BytePerPixelDETC = 2; |
| *BytePerPixelY = 1; |
| *BytePerPixelC = 2; |
| } else if (SourcePixelFormat == dm_420_12) { |
| *BytePerPixelDETY = 2; |
| *BytePerPixelDETC = 4; |
| *BytePerPixelY = 2; |
| *BytePerPixelC = 4; |
| } else { |
| *BytePerPixelDETY = 4.0 / 3; |
| *BytePerPixelDETC = 8.0 / 3; |
| *BytePerPixelY = 2; |
| *BytePerPixelC = 4; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); |
| dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); |
| dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); |
| dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); |
| dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); |
| #endif |
| if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 |
| || SourcePixelFormat == dm_444_16 |
| || SourcePixelFormat == dm_444_8 |
| || SourcePixelFormat == dm_mono_16 |
| || SourcePixelFormat == dm_mono_8 |
| || SourcePixelFormat == dm_rgbe)) { |
| if (SurfaceTiling == dm_sw_linear) |
| *BlockHeight256BytesY = 1; |
| else if (SourcePixelFormat == dm_444_64) |
| *BlockHeight256BytesY = 4; |
| else if (SourcePixelFormat == dm_444_8) |
| *BlockHeight256BytesY = 16; |
| else |
| *BlockHeight256BytesY = 8; |
| |
| *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; |
| *BlockHeight256BytesC = 0; |
| *BlockWidth256BytesC = 0; |
| } else { |
| if (SurfaceTiling == dm_sw_linear) { |
| *BlockHeight256BytesY = 1; |
| *BlockHeight256BytesC = 1; |
| } else if (SourcePixelFormat == dm_rgbe_alpha) { |
| *BlockHeight256BytesY = 8; |
| *BlockHeight256BytesC = 16; |
| } else if (SourcePixelFormat == dm_420_8) { |
| *BlockHeight256BytesY = 16; |
| *BlockHeight256BytesC = 8; |
| } else { |
| *BlockHeight256BytesY = 8; |
| *BlockHeight256BytesC = 8; |
| } |
| *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; |
| *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); |
| dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); |
| dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); |
| dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); |
| #endif |
| |
| if (SurfaceTiling == dm_sw_linear) { |
| *MacroTileHeightY = *BlockHeight256BytesY; |
| *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; |
| *MacroTileHeightC = *BlockHeight256BytesC; |
| if (*MacroTileHeightC == 0) |
| *MacroTileWidthC = 0; |
| else |
| *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; |
| } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || |
| SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { |
| *MacroTileHeightY = 16 * *BlockHeight256BytesY; |
| *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; |
| *MacroTileHeightC = 16 * *BlockHeight256BytesC; |
| if (*MacroTileHeightC == 0) |
| *MacroTileWidthC = 0; |
| else |
| *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; |
| } else { |
| *MacroTileHeightY = 32 * *BlockHeight256BytesY; |
| *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; |
| *MacroTileHeightC = 32 * *BlockHeight256BytesC; |
| if (*MacroTileHeightC == 0) |
| *MacroTileWidthC = 0; |
| else |
| *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); |
| dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); |
| dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); |
| dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); |
| #endif |
| } // CalculateBytePerPixelAndBlockSizes |
| |
| void dml32_CalculateSwathAndDETConfiguration( |
| unsigned int DETSizeOverride[], |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], |
| unsigned int ConfigReturnBufferSizeInKByte, |
| unsigned int MaxTotalDETInKByte, |
| unsigned int MinCompressedBufferSizeInKByte, |
| double ForceSingleDPP, |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int nomDETInKByte, |
| enum unbounded_requesting_policy UseUnboundedRequestingFinal, |
| bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, |
| unsigned int PixelChunkSizeKBytes, |
| unsigned int ROBSizeKBytes, |
| unsigned int CompressedBufferSegmentSizeInkByteFinal, |
| enum output_encoder_class Output[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double MaximumSwathWidthLuma[], |
| double MaximumSwathWidthChroma[], |
| enum dm_rotation_angle SourceRotation[], |
| bool ViewportStationary[], |
| enum source_format_class SourcePixelFormat[], |
| enum dm_swizzle_mode SurfaceTiling[], |
| unsigned int ViewportWidth[], |
| unsigned int ViewportHeight[], |
| unsigned int ViewportXStart[], |
| unsigned int ViewportYStart[], |
| unsigned int ViewportXStartC[], |
| unsigned int ViewportYStartC[], |
| unsigned int SurfaceWidthY[], |
| unsigned int SurfaceWidthC[], |
| unsigned int SurfaceHeightY[], |
| unsigned int SurfaceHeightC[], |
| unsigned int Read256BytesBlockHeightY[], |
| unsigned int Read256BytesBlockHeightC[], |
| unsigned int Read256BytesBlockWidthY[], |
| unsigned int Read256BytesBlockWidthC[], |
| enum odm_combine_mode ODMMode[], |
| unsigned int BlendingAndTiming[], |
| unsigned int BytePerPixY[], |
| unsigned int BytePerPixC[], |
| double BytePerPixDETY[], |
| double BytePerPixDETC[], |
| unsigned int HActive[], |
| double HRatio[], |
| double HRatioChroma[], |
| unsigned int DPPPerSurface[], |
| |
| /* Output */ |
| unsigned int swath_width_luma_ub[], |
| unsigned int swath_width_chroma_ub[], |
| double SwathWidth[], |
| double SwathWidthChroma[], |
| unsigned int SwathHeightY[], |
| unsigned int SwathHeightC[], |
| unsigned int DETBufferSizeInKByte[], |
| unsigned int DETBufferSizeY[], |
| unsigned int DETBufferSizeC[], |
| bool *UnboundedRequestEnabled, |
| unsigned int *CompressedBufferSizeInkByte, |
| unsigned int *CompBufReservedSpaceKBytes, |
| bool *CompBufReservedSpaceNeedAdjustment, |
| bool ViewportSizeSupportPerSurface[], |
| bool *ViewportSizeSupport) |
| { |
| unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; |
| unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; |
| unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; |
| unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; |
| unsigned int RoundedUpSwathSizeBytesY; |
| unsigned int RoundedUpSwathSizeBytesC; |
| double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; |
| double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; |
| unsigned int k; |
| unsigned int TotalActiveDPP = 0; |
| bool NoChromaSurfaces = true; |
| unsigned int DETBufferSizeInKByteForSwathCalculation; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); |
| dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); |
| dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); |
| #endif |
| dml32_CalculateSwathWidth(ForceSingleDPP, |
| NumberOfActiveSurfaces, |
| SourcePixelFormat, |
| SourceRotation, |
| ViewportStationary, |
| ViewportWidth, |
| ViewportHeight, |
| ViewportXStart, |
| ViewportYStart, |
| ViewportXStartC, |
| ViewportYStartC, |
| SurfaceWidthY, |
| SurfaceWidthC, |
| SurfaceHeightY, |
| SurfaceHeightC, |
| ODMMode, |
| BytePerPixY, |
| BytePerPixC, |
| Read256BytesBlockHeightY, |
| Read256BytesBlockHeightC, |
| Read256BytesBlockWidthY, |
| Read256BytesBlockWidthC, |
| BlendingAndTiming, |
| HActive, |
| HRatio, |
| DPPPerSurface, |
| |
| /* Output */ |
| SwathWidthdoubleDPP, |
| SwathWidthdoubleDPPChroma, |
| SwathWidth, |
| SwathWidthChroma, |
| MaximumSwathHeightY, |
| MaximumSwathHeightC, |
| swath_width_luma_ub, |
| swath_width_chroma_ub); |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; |
| RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); |
| dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); |
| dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); |
| dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, |
| RoundedUpMaxSwathSizeBytesY[k]); |
| dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); |
| dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); |
| dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, |
| RoundedUpMaxSwathSizeBytesC[k]); |
| #endif |
| |
| if (SourcePixelFormat[k] == dm_420_10) { |
| RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); |
| RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); |
| } |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); |
| if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || |
| SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { |
| NoChromaSurfaces = false; |
| } |
| } |
| |
| // By default, just set the reserved space to 2 pixel chunks size |
| *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; |
| |
| // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data |
| // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] |
| // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req |
| *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); |
| |
| if (*CompBufReservedSpaceNeedAdjustment == 1) { |
| *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); |
| dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); |
| #endif |
| |
| *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); |
| |
| dml32_CalculateDETBufferSize(DETSizeOverride, |
| UseMALLForPStateChange, |
| ForceSingleDPP, |
| NumberOfActiveSurfaces, |
| *UnboundedRequestEnabled, |
| nomDETInKByte, |
| MaxTotalDETInKByte, |
| ConfigReturnBufferSizeInKByte, |
| MinCompressedBufferSizeInKByte, |
| CompressedBufferSegmentSizeInkByteFinal, |
| SourcePixelFormat, |
| ReadBandwidthLuma, |
| ReadBandwidthChroma, |
| RoundedUpMaxSwathSizeBytesY, |
| RoundedUpMaxSwathSizeBytesC, |
| DPPPerSurface, |
| |
| /* Output */ |
| DETBufferSizeInKByte, // per hubp pipe |
| CompressedBufferSizeInkByte); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); |
| dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); |
| dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); |
| dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); |
| dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); |
| dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); |
| #endif |
| |
| *ViewportSizeSupport = true; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| |
| DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == |
| dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, |
| DETBufferSizeInKByteForSwathCalculation); |
| #endif |
| |
| if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= |
| DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { |
| SwathHeightY[k] = MaximumSwathHeightY[k]; |
| SwathHeightC[k] = MaximumSwathHeightC[k]; |
| RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; |
| RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; |
| } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && |
| RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= |
| DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { |
| SwathHeightY[k] = MaximumSwathHeightY[k] / 2; |
| SwathHeightC[k] = MaximumSwathHeightC[k]; |
| RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; |
| RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; |
| } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && |
| RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= |
| DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { |
| SwathHeightY[k] = MaximumSwathHeightY[k]; |
| SwathHeightC[k] = MaximumSwathHeightC[k] / 2; |
| RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; |
| RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; |
| } else { |
| SwathHeightY[k] = MaximumSwathHeightY[k] / 2; |
| SwathHeightC[k] = MaximumSwathHeightC[k] / 2; |
| RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; |
| RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; |
| } |
| |
| if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > |
| DETBufferSizeInKByteForSwathCalculation * 1024 / 2) |
| || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && |
| SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { |
| *ViewportSizeSupport = false; |
| ViewportSizeSupportPerSurface[k] = false; |
| } else { |
| ViewportSizeSupportPerSurface[k] = true; |
| } |
| |
| if (SwathHeightC[k] == 0) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); |
| #endif |
| DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; |
| DETBufferSizeC[k] = 0; |
| } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); |
| #endif |
| DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; |
| DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; |
| } else { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); |
| #endif |
| DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); |
| DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); |
| dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, |
| k, RoundedUpMaxSwathSizeBytesY[k]); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, |
| k, RoundedUpMaxSwathSizeBytesC[k]); |
| dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); |
| dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); |
| dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); |
| dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); |
| dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); |
| dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, |
| ViewportSizeSupportPerSurface[k]); |
| #endif |
| |
| } |
| } // CalculateSwathAndDETConfiguration |
| |
| void dml32_CalculateSwathWidth( |
| bool ForceSingleDPP, |
| unsigned int NumberOfActiveSurfaces, |
| enum source_format_class SourcePixelFormat[], |
| enum dm_rotation_angle SourceRotation[], |
| bool ViewportStationary[], |
| unsigned int ViewportWidth[], |
| unsigned int ViewportHeight[], |
| unsigned int ViewportXStart[], |
| unsigned int ViewportYStart[], |
| unsigned int ViewportXStartC[], |
| unsigned int ViewportYStartC[], |
| unsigned int SurfaceWidthY[], |
| unsigned int SurfaceWidthC[], |
| unsigned int SurfaceHeightY[], |
| unsigned int SurfaceHeightC[], |
| enum odm_combine_mode ODMMode[], |
| unsigned int BytePerPixY[], |
| unsigned int BytePerPixC[], |
| unsigned int Read256BytesBlockHeightY[], |
| unsigned int Read256BytesBlockHeightC[], |
| unsigned int Read256BytesBlockWidthY[], |
| unsigned int Read256BytesBlockWidthC[], |
| unsigned int BlendingAndTiming[], |
| unsigned int HActive[], |
| double HRatio[], |
| unsigned int DPPPerSurface[], |
| |
| /* Output */ |
| double SwathWidthdoubleDPPY[], |
| double SwathWidthdoubleDPPC[], |
| double SwathWidthY[], // per-pipe |
| double SwathWidthC[], // per-pipe |
| unsigned int MaximumSwathHeightY[], |
| unsigned int MaximumSwathHeightC[], |
| unsigned int swath_width_luma_ub[], // per-pipe |
| unsigned int swath_width_chroma_ub[]) // per-pipe |
| { |
| unsigned int k, j; |
| enum odm_combine_mode MainSurfaceODMMode; |
| |
| unsigned int surface_width_ub_l; |
| unsigned int surface_height_ub_l; |
| unsigned int surface_width_ub_c = 0; |
| unsigned int surface_height_ub_c = 0; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); |
| dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); |
| #endif |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (!IsVertical(SourceRotation[k])) |
| SwathWidthdoubleDPPY[k] = ViewportWidth[k]; |
| else |
| SwathWidthdoubleDPPY[k] = ViewportHeight[k]; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); |
| dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); |
| #endif |
| |
| MainSurfaceODMMode = ODMMode[k]; |
| for (j = 0; j < NumberOfActiveSurfaces; ++j) { |
| if (BlendingAndTiming[k] == j) |
| MainSurfaceODMMode = ODMMode[j]; |
| } |
| |
| if (ForceSingleDPP) { |
| SwathWidthY[k] = SwathWidthdoubleDPPY[k]; |
| } else { |
| if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { |
| SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], |
| dml_round(HActive[k] / 4.0 * HRatio[k])); |
| } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { |
| SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], |
| dml_round(HActive[k] / 2.0 * HRatio[k])); |
| } else if (DPPPerSurface[k] == 2) { |
| SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; |
| } else { |
| SwathWidthY[k] = SwathWidthdoubleDPPY[k]; |
| } |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); |
| dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); |
| dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); |
| dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); |
| dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); |
| #endif |
| |
| if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || |
| SourcePixelFormat[k] == dm_420_12) { |
| SwathWidthC[k] = SwathWidthY[k] / 2; |
| SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; |
| } else { |
| SwathWidthC[k] = SwathWidthY[k]; |
| SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; |
| } |
| |
| if (ForceSingleDPP == true) { |
| SwathWidthY[k] = SwathWidthdoubleDPPY[k]; |
| SwathWidthC[k] = SwathWidthdoubleDPPC[k]; |
| } |
| |
| surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); |
| surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); |
| |
| if (!IsVertical(SourceRotation[k])) { |
| MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; |
| MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; |
| if (ViewportStationary[k] && DPPPerSurface[k] == 1) { |
| swath_width_luma_ub[k] = dml_min(surface_width_ub_l, |
| dml_floor(ViewportXStart[k] + |
| SwathWidthY[k] + |
| Read256BytesBlockWidthY[k] - 1, |
| Read256BytesBlockWidthY[k]) - |
| dml_floor(ViewportXStart[k], |
| Read256BytesBlockWidthY[k])); |
| } else { |
| swath_width_luma_ub[k] = dml_min(surface_width_ub_l, |
| dml_ceil(SwathWidthY[k] - 1, |
| Read256BytesBlockWidthY[k]) + |
| Read256BytesBlockWidthY[k]); |
| } |
| if (BytePerPixC[k] > 0) { |
| surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); |
| if (ViewportStationary[k] && DPPPerSurface[k] == 1) { |
| swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, |
| dml_floor(ViewportXStartC[k] + SwathWidthC[k] + |
| Read256BytesBlockWidthC[k] - 1, |
| Read256BytesBlockWidthC[k]) - |
| dml_floor(ViewportXStartC[k], |
| Read256BytesBlockWidthC[k])); |
| } else { |
| swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, |
| dml_ceil(SwathWidthC[k] - 1, |
| Read256BytesBlockWidthC[k]) + |
| Read256BytesBlockWidthC[k]); |
| } |
| } else { |
| swath_width_chroma_ub[k] = 0; |
| } |
| } else { |
| MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; |
| MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; |
| |
| if (ViewportStationary[k] && DPPPerSurface[k] == 1) { |
| swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + |
| SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, |
| Read256BytesBlockHeightY[k]) - |
| dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); |
| } else { |
| swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, |
| Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); |
| } |
| if (BytePerPixC[k] > 0) { |
| surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); |
| if (ViewportStationary[k] && DPPPerSurface[k] == 1) { |
| swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, |
| dml_floor(ViewportYStartC[k] + SwathWidthC[k] + |
| Read256BytesBlockHeightC[k] - 1, |
| Read256BytesBlockHeightC[k]) - |
| dml_floor(ViewportYStartC[k], |
| Read256BytesBlockHeightC[k])); |
| } else { |
| swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, |
| dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + |
| Read256BytesBlockHeightC[k]); |
| } |
| } else { |
| swath_width_chroma_ub[k] = 0; |
| } |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); |
| dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); |
| dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); |
| dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); |
| dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); |
| dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); |
| dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); |
| dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); |
| dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); |
| dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); |
| dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); |
| dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); |
| dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); |
| dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); |
| #endif |
| |
| } |
| } // CalculateSwathWidth |
| |
| bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, |
| unsigned int TotalNumberOfActiveDPP, |
| bool NoChroma, |
| enum output_encoder_class Output, |
| enum dm_swizzle_mode SurfaceTiling, |
| bool CompBufReservedSpaceNeedAdjustment, |
| bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) |
| { |
| bool ret_val = false; |
| |
| ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && |
| TotalNumberOfActiveDPP == 1 && NoChroma); |
| if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) |
| ret_val = false; |
| |
| if (SurfaceTiling == dm_sw_linear) |
| ret_val = false; |
| |
| if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) |
| ret_val = false; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); |
| dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); |
| dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); |
| #endif |
| |
| return (ret_val); |
| } |
| |
| void dml32_CalculateDETBufferSize( |
| unsigned int DETSizeOverride[], |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], |
| bool ForceSingleDPP, |
| unsigned int NumberOfActiveSurfaces, |
| bool UnboundedRequestEnabled, |
| unsigned int nomDETInKByte, |
| unsigned int MaxTotalDETInKByte, |
| unsigned int ConfigReturnBufferSizeInKByte, |
| unsigned int MinCompressedBufferSizeInKByte, |
| unsigned int CompressedBufferSegmentSizeInkByteFinal, |
| enum source_format_class SourcePixelFormat[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| unsigned int RoundedUpMaxSwathSizeBytesY[], |
| unsigned int RoundedUpMaxSwathSizeBytesC[], |
| unsigned int DPPPerSurface[], |
| /* Output */ |
| unsigned int DETBufferSizeInKByte[], |
| unsigned int *CompressedBufferSizeInkByte) |
| { |
| unsigned int DETBufferSizePoolInKByte; |
| unsigned int NextDETBufferPieceInKByte; |
| bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; |
| bool NextPotentialSurfaceToAssignDETPieceFound; |
| unsigned int NextSurfaceToAssignDETPiece; |
| double TotalBandwidth; |
| double BandwidthOfSurfacesNotAssignedDETPiece; |
| unsigned int max_minDET; |
| unsigned int minDET; |
| unsigned int minDET_pipe; |
| unsigned int j, k; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); |
| dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); |
| dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); |
| dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); |
| dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); |
| dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); |
| dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); |
| dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, |
| CompressedBufferSegmentSizeInkByteFinal); |
| #endif |
| |
| // Note: Will use default det size if that fits 2 swaths |
| if (UnboundedRequestEnabled) { |
| if (DETSizeOverride[0] > 0) { |
| DETBufferSizeInKByte[0] = DETSizeOverride[0]; |
| } else { |
| DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * |
| ((double) RoundedUpMaxSwathSizeBytesY[0] + |
| (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); |
| } |
| *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; |
| } else { |
| DETBufferSizePoolInKByte = MaxTotalDETInKByte; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| DETBufferSizeInKByte[k] = nomDETInKByte; |
| if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || |
| SourcePixelFormat[k] == dm_420_12) { |
| max_minDET = nomDETInKByte - 64; |
| } else { |
| max_minDET = nomDETInKByte; |
| } |
| minDET = 128; |
| minDET_pipe = 0; |
| |
| // add DET resource until can hold 2 full swaths |
| while (minDET <= max_minDET && minDET_pipe == 0) { |
| if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + |
| (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) |
| minDET_pipe = minDET; |
| minDET = minDET + 64; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); |
| dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); |
| dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, |
| RoundedUpMaxSwathSizeBytesY[k]); |
| dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, |
| RoundedUpMaxSwathSizeBytesC[k]); |
| #endif |
| |
| if (minDET_pipe == 0) { |
| minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + |
| (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", |
| __func__, k, minDET_pipe); |
| #endif |
| } |
| |
| if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { |
| DETBufferSizeInKByte[k] = 0; |
| } else if (DETSizeOverride[k] > 0) { |
| DETBufferSizeInKByte[k] = DETSizeOverride[k]; |
| DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - |
| (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; |
| } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { |
| DETBufferSizeInKByte[k] = minDET_pipe; |
| DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - |
| (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); |
| dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); |
| dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); |
| dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); |
| #endif |
| } |
| |
| TotalBandwidth = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) |
| TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); |
| for (uint k = 0; k < NumberOfActiveSurfaces; ++k) |
| dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); |
| dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); |
| dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); |
| #endif |
| BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| |
| if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { |
| DETPieceAssignedToThisSurfaceAlready[k] = true; |
| } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * |
| (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= |
| ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { |
| DETPieceAssignedToThisSurfaceAlready[k] = true; |
| BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - |
| ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; |
| } else { |
| DETPieceAssignedToThisSurfaceAlready[k] = false; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, |
| DETPieceAssignedToThisSurfaceAlready[k]); |
| dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, |
| BandwidthOfSurfacesNotAssignedDETPiece); |
| #endif |
| } |
| |
| for (j = 0; j < NumberOfActiveSurfaces; ++j) { |
| NextPotentialSurfaceToAssignDETPieceFound = false; |
| NextSurfaceToAssignDETPiece = 0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, |
| ReadBandwidthLuma[k]); |
| dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, |
| ReadBandwidthChroma[k]); |
| dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, |
| ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); |
| dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, |
| ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); |
| dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, |
| NextSurfaceToAssignDETPiece); |
| #endif |
| if (!DETPieceAssignedToThisSurfaceAlready[k] && |
| (!NextPotentialSurfaceToAssignDETPieceFound || |
| ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < |
| ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + |
| ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { |
| NextSurfaceToAssignDETPiece = k; |
| NextPotentialSurfaceToAssignDETPieceFound = true; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", |
| __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); |
| dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", |
| __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); |
| #endif |
| } |
| |
| if (NextPotentialSurfaceToAssignDETPieceFound) { |
| // Note: To show the banker's rounding behavior in VBA and also the fact |
| // that the DET buffer size varies due to precision issue |
| // |
| //double tmp1 = ((double) DETBufferSizePoolInKByte * |
| // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + |
| // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / |
| // BandwidthOfSurfacesNotAssignedDETPiece / |
| // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); |
| //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * |
| // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + |
| // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / |
| //BandwidthOfSurfacesNotAssignedDETPiece / |
| // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); |
| // |
| //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); |
| //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); |
| |
| NextDETBufferPieceInKByte = dml_min( |
| dml_round((double) DETBufferSizePoolInKByte * |
| (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + |
| ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / |
| BandwidthOfSurfacesNotAssignedDETPiece / |
| ((ForceSingleDPP ? 1 : |
| DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * |
| (ForceSingleDPP ? 1 : |
| DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, |
| dml_floor((double) DETBufferSizePoolInKByte, |
| (ForceSingleDPP ? 1 : |
| DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); |
| |
| // Above calculation can assign the entire DET buffer allocation to a single pipe. |
| // We should limit the per-pipe DET size to the nominal / max per pipe. |
| if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { |
| if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < |
| nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { |
| NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - |
| DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; |
| } else { |
| // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] |
| // already has the max per-pipe value |
| NextDETBufferPieceInKByte = 0; |
| } |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, |
| DETBufferSizePoolInKByte); |
| dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, |
| NextSurfaceToAssignDETPiece); |
| dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, |
| NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); |
| dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, |
| NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); |
| dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", |
| __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); |
| dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, |
| NextDETBufferPieceInKByte); |
| dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", |
| __func__, j, NextSurfaceToAssignDETPiece, |
| DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); |
| #endif |
| |
| DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = |
| DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] |
| + NextDETBufferPieceInKByte |
| / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); |
| #endif |
| |
| DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; |
| DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; |
| BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - |
| (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + |
| ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); |
| } |
| } |
| *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; |
| } |
| *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); |
| dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); |
| for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { |
| dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", |
| __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); |
| } |
| #endif |
| } // CalculateDETBufferSize |
| |
| void dml32_CalculateODMMode( |
| unsigned int MaximumPixelsPerLinePerDSCUnit, |
| unsigned int HActive, |
| enum output_format_class OutFormat, |
| enum output_encoder_class Output, |
| enum odm_combine_policy ODMUse, |
| double StateDispclk, |
| double MaxDispclk, |
| bool DSCEnable, |
| unsigned int TotalNumberOfActiveDPP, |
| unsigned int MaxNumDPP, |
| double PixelClock, |
| double DISPCLKDPPCLKDSCCLKDownSpreading, |
| double DISPCLKRampingMargin, |
| double DISPCLKDPPCLKVCOSpeed, |
| unsigned int NumberOfDSCSlices, |
| |
| /* Output */ |
| bool *TotalAvailablePipesSupport, |
| unsigned int *NumberOfDPP, |
| enum odm_combine_mode *ODMMode, |
| double *RequiredDISPCLKPerSurface) |
| { |
| |
| double SurfaceRequiredDISPCLKWithoutODMCombine; |
| double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; |
| double SurfaceRequiredDISPCLKWithODMCombineFourToOne; |
| |
| SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, |
| PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, |
| MaxDispclk); |
| SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, |
| PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, |
| MaxDispclk); |
| SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, |
| PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, |
| MaxDispclk); |
| *TotalAvailablePipesSupport = true; |
| *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable |
| |
| if (ODMUse == dm_odm_combine_policy_none) |
| *ODMMode = dm_odm_combine_mode_disabled; |
| |
| *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; |
| *NumberOfDPP = 0; |
| |
| // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? |
| // (ODMUse == "" || ODMUse == "CombineAsNeeded") |
| |
| if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || |
| ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || |
| (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) |
| || NumberOfDSCSlices > 8)))) { |
| if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { |
| *ODMMode = dm_odm_combine_mode_4to1; |
| *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; |
| *NumberOfDPP = 4; |
| } else { |
| *TotalAvailablePipesSupport = false; |
| } |
| } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || |
| (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && |
| SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || |
| (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) |
| || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { |
| if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { |
| *ODMMode = dm_odm_combine_mode_2to1; |
| *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; |
| *NumberOfDPP = 2; |
| } else { |
| *TotalAvailablePipesSupport = false; |
| } |
| } else { |
| if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) |
| *NumberOfDPP = 1; |
| else |
| *TotalAvailablePipesSupport = false; |
| } |
| if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && |
| ODMUse != dm_odm_combine_policy_4to1) { |
| if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { |
| *ODMMode = dm_odm_combine_mode_disabled; |
| *NumberOfDPP = 0; |
| *TotalAvailablePipesSupport = false; |
| } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || |
| *ODMMode == dm_odm_combine_mode_4to1) { |
| *ODMMode = dm_odm_combine_mode_4to1; |
| *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; |
| *NumberOfDPP = 4; |
| } else { |
| *ODMMode = dm_odm_combine_mode_2to1; |
| *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; |
| *NumberOfDPP = 2; |
| } |
| } |
| if (Output == dm_hdmi && OutFormat == dm_420 && |
| HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { |
| *ODMMode = dm_odm_combine_mode_disabled; |
| *NumberOfDPP = 0; |
| *TotalAvailablePipesSupport = false; |
| } |
| } |
| |
| double dml32_CalculateRequiredDispclk( |
| enum odm_combine_mode ODMMode, |
| double PixelClock, |
| double DISPCLKDPPCLKDSCCLKDownSpreading, |
| double DISPCLKRampingMargin, |
| double DISPCLKDPPCLKVCOSpeed, |
| double MaxDispclk) |
| { |
| double RequiredDispclk = 0.; |
| double PixelClockAfterODM; |
| double DISPCLKWithRampingRoundedToDFSGranularity; |
| double DISPCLKWithoutRampingRoundedToDFSGranularity; |
| double MaxDispclkRoundedDownToDFSGranularity; |
| |
| if (ODMMode == dm_odm_combine_mode_4to1) |
| PixelClockAfterODM = PixelClock / 4; |
| else if (ODMMode == dm_odm_combine_mode_2to1) |
| PixelClockAfterODM = PixelClock / 2; |
| else |
| PixelClockAfterODM = PixelClock; |
| |
| |
| DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( |
| PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) |
| * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); |
| |
| DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( |
| PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); |
| |
| MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); |
| |
| if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) |
| RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; |
| else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) |
| RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; |
| else |
| RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; |
| |
| return RequiredDispclk; |
| } |
| |
| double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) |
| { |
| if (Clock <= 0.0) |
| return 0.0; |
| |
| if (round_up) |
| return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); |
| else |
| return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); |
| } |
| |
| void dml32_CalculateOutputLink( |
| double PHYCLKPerState, |
| double PHYCLKD18PerState, |
| double PHYCLKD32PerState, |
| double Downspreading, |
| bool IsMainSurfaceUsingTheIndicatedTiming, |
| enum output_encoder_class Output, |
| enum output_format_class OutputFormat, |
| unsigned int HTotal, |
| unsigned int HActive, |
| double PixelClockBackEnd, |
| double ForcedOutputLinkBPP, |
| unsigned int DSCInputBitPerComponent, |
| unsigned int NumberOfDSCSlices, |
| double AudioSampleRate, |
| unsigned int AudioSampleLayout, |
| enum odm_combine_mode ODMModeNoDSC, |
| enum odm_combine_mode ODMModeDSC, |
| bool DSCEnable, |
| unsigned int OutputLinkDPLanes, |
| enum dm_output_link_dp_rate OutputLinkDPRate, |
| |
| /* Output */ |
| bool *RequiresDSC, |
| double *RequiresFEC, |
| double *OutBpp, |
| enum dm_output_type *OutputType, |
| enum dm_output_rate *OutputRate, |
| unsigned int *RequiredSlots) |
| { |
| bool LinkDSCEnable; |
| unsigned int dummy; |
| *RequiresDSC = false; |
| *RequiresFEC = false; |
| *OutBpp = 0; |
| *OutputType = dm_output_type_unknown; |
| *OutputRate = dm_output_rate_unknown; |
| |
| if (IsMainSurfaceUsingTheIndicatedTiming) { |
| if (Output == dm_hdmi) { |
| *RequiresDSC = false; |
| *RequiresFEC = false; |
| *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, |
| PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, &dummy); |
| //OutputTypeAndRate = "HDMI"; |
| *OutputType = dm_output_type_hdmi; |
| |
| } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { |
| if (DSCEnable == true) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| if (Output == dm_dp || Output == dm_dp2p0) |
| *RequiresFEC = true; |
| else |
| *RequiresFEC = false; |
| } else { |
| *RequiresDSC = false; |
| LinkDSCEnable = false; |
| if (Output == dm_dp2p0) |
| *RequiresFEC = true; |
| else |
| *RequiresFEC = false; |
| } |
| if (Output == dm_dp2p0) { |
| *OutBpp = 0; |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && |
| PHYCLKD32PerState >= 10000 / 32) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, |
| AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && |
| ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " UHBR10"; |
| *OutputType = dm_output_type_dp2p0; |
| *OutputRate = dm_output_rate_dp_rate_uhbr10; |
| } |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && |
| *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, |
| AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| |
| if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && |
| ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " UHBR13p5"; |
| *OutputType = dm_output_type_dp2p0; |
| *OutputRate = dm_output_rate_dp_rate_uhbr13p5; |
| } |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && |
| *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, |
| AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " UHBR20"; |
| *OutputType = dm_output_type_dp2p0; |
| *OutputRate = dm_output_rate_dp_rate_uhbr20; |
| } |
| } else { |
| *OutBpp = 0; |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && |
| PHYCLKPerState >= 270) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, |
| AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && |
| ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| if (Output == dm_dp) |
| *RequiresFEC = true; |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " HBR"; |
| *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; |
| *OutputRate = dm_output_rate_dp_rate_hbr; |
| } |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && |
| *OutBpp == 0 && PHYCLKPerState >= 540) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, |
| DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, |
| AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| |
| if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && |
| ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| if (Output == dm_dp) |
| *RequiresFEC = true; |
| |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " HBR2"; |
| *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; |
| *OutputRate = dm_output_rate_dp_rate_hbr2; |
| } |
| if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, |
| AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, |
| RequiredSlots); |
| |
| if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { |
| *RequiresDSC = true; |
| LinkDSCEnable = true; |
| if (Output == dm_dp) |
| *RequiresFEC = true; |
| |
| *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, |
| OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, |
| ForcedOutputLinkBPP, LinkDSCEnable, Output, |
| OutputFormat, DSCInputBitPerComponent, |
| NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, |
| ODMModeNoDSC, ODMModeDSC, RequiredSlots); |
| } |
| //OutputTypeAndRate = Output & " HBR3"; |
| *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; |
| *OutputRate = dm_output_rate_dp_rate_hbr3; |
| } |
| } |
| } |
| } |
| } |
| |
| void dml32_CalculateDPPCLK( |
| unsigned int NumberOfActiveSurfaces, |
| double DISPCLKDPPCLKDSCCLKDownSpreading, |
| double DISPCLKDPPCLKVCOSpeed, |
| double DPPCLKUsingSingleDPP[], |
| unsigned int DPPPerSurface[], |
| |
| /* output */ |
| double *GlobalDPPCLK, |
| double Dppclk[]) |
| { |
| unsigned int k; |
| *GlobalDPPCLK = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
| *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); |
| } |
| *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) |
| Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); |
| } |
| |
| double dml32_TruncToValidBPP( |
| double LinkBitRate, |
| unsigned int Lanes, |
| unsigned int HTotal, |
| unsigned int HActive, |
| double PixelClock, |
| double DesiredBPP, |
| bool DSCEnable, |
| enum output_encoder_class Output, |
| enum output_format_class Format, |
| unsigned int DSCInputBitPerComponent, |
| unsigned int DSCSlices, |
| unsigned int AudioRate, |
| unsigned int AudioLayout, |
| enum odm_combine_mode ODMModeNoDSC, |
| enum odm_combine_mode ODMModeDSC, |
| /* Output */ |
| unsigned int *RequiredSlots) |
| { |
| double MaxLinkBPP; |
| unsigned int MinDSCBPP; |
| double MaxDSCBPP; |
| unsigned int NonDSCBPP0; |
| unsigned int NonDSCBPP1; |
| unsigned int NonDSCBPP2; |
| |
| if (Format == dm_420) { |
| NonDSCBPP0 = 12; |
| NonDSCBPP1 = 15; |
| NonDSCBPP2 = 18; |
| MinDSCBPP = 6; |
| MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; |
| } else if (Format == dm_444) { |
| NonDSCBPP0 = 24; |
| NonDSCBPP1 = 30; |
| NonDSCBPP2 = 36; |
| MinDSCBPP = 8; |
| MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; |
| } else { |
| if (Output == dm_hdmi) { |
| NonDSCBPP0 = 24; |
| NonDSCBPP1 = 24; |
| NonDSCBPP2 = 24; |
| } else { |
| NonDSCBPP0 = 16; |
| NonDSCBPP1 = 20; |
| NonDSCBPP2 = 24; |
| } |
| if (Format == dm_n422) { |
| MinDSCBPP = 7; |
| MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; |
| } else { |
| MinDSCBPP = 8; |
| MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; |
| } |
| } |
| if (Output == dm_dp2p0) { |
| MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; |
| } else if (DSCEnable && Output == dm_dp) { |
| MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); |
| } else { |
| MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; |
| } |
| |
| if (DSCEnable) { |
| if (ODMModeDSC == dm_odm_combine_mode_4to1) |
| MaxLinkBPP = dml_min(MaxLinkBPP, 16); |
| else if (ODMModeDSC == dm_odm_combine_mode_2to1) |
| MaxLinkBPP = dml_min(MaxLinkBPP, 32); |
| else if (ODMModeDSC == dm_odm_split_mode_1to2) |
| MaxLinkBPP = 2 * MaxLinkBPP; |
| } else { |
| if (ODMModeNoDSC == dm_odm_combine_mode_4to1) |
| MaxLinkBPP = dml_min(MaxLinkBPP, 16); |
| else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) |
| MaxLinkBPP = dml_min(MaxLinkBPP, 32); |
| else if (ODMModeNoDSC == dm_odm_split_mode_1to2) |
| MaxLinkBPP = 2 * MaxLinkBPP; |
| } |
| |
| if (DesiredBPP == 0) { |
| if (DSCEnable) { |
| if (MaxLinkBPP < MinDSCBPP) |
| return BPP_INVALID; |
| else if (MaxLinkBPP >= MaxDSCBPP) |
| return MaxDSCBPP; |
| else |
| return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; |
| } else { |
| if (MaxLinkBPP >= NonDSCBPP2) |
| return NonDSCBPP2; |
| else if (MaxLinkBPP >= NonDSCBPP1) |
| return NonDSCBPP1; |
| else if (MaxLinkBPP >= NonDSCBPP0) |
| return 16.0; |
| else |
| return BPP_INVALID; |
| } |
| } else { |
| if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || |
| DesiredBPP <= NonDSCBPP0)) || |
| (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) |
| return BPP_INVALID; |
| else |
| return DesiredBPP; |
| } |
| |
| *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); |
| |
| return BPP_INVALID; |
| } // TruncToValidBPP |
| |
| double dml32_RequiredDTBCLK( |
| bool DSCEnable, |
| double PixelClock, |
| enum output_format_class OutputFormat, |
| double OutputBpp, |
| unsigned int DSCSlices, |
| unsigned int HTotal, |
| unsigned int HActive, |
| unsigned int AudioRate, |
| unsigned int AudioLayout) |
| { |
| double PixelWordRate; |
| double HCActive; |
| double HCBlank; |
| double AverageTribyteRate; |
| double HActiveTribyteRate; |
| |
| if (DSCEnable != true) |
| return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); |
| |
| PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); |
| HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * |
| dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); |
| HCBlank = 64 + 32 * |
| dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); |
| AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; |
| HActiveTribyteRate = PixelWordRate * HCActive / HActive; |
| return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; |
| } |
| |
| unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, |
| enum odm_combine_mode ODMMode, |
| unsigned int DSCInputBitPerComponent, |
| double OutputBpp, |
| unsigned int HActive, |
| unsigned int HTotal, |
| unsigned int NumberOfDSCSlices, |
| enum output_format_class OutputFormat, |
| enum output_encoder_class Output, |
| double PixelClock, |
| double PixelClockBackEnd, |
| double dsc_delay_factor_wa) |
| { |
| unsigned int DSCDelayRequirement_val; |
| |
| if (DSCEnabled == true && OutputBpp != 0) { |
| if (ODMMode == dm_odm_combine_mode_4to1) { |
| DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, |
| dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, |
| OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); |
| } else if (ODMMode == dm_odm_combine_mode_2to1) { |
| DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, |
| dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, |
| OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); |
| } else { |
| DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, |
| dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, |
| OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); |
| } |
| |
| DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * |
| dml_ceil((double)DSCDelayRequirement_val / HActive, 1); |
| |
| DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; |
| |
| } else { |
| DSCDelayRequirement_val = 0; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); |
| dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); |
| dml_print("DML::%s: HActive = %d\n", __func__, HActive); |
| dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); |
| dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); |
| dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); |
| dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); |
| #endif |
| |
| return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); |
| } |
| |
| void dml32_CalculateSurfaceSizeInMall( |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int MALLAllocatedForDCN, |
| enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], |
| enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], |
| bool DCCEnable[], |
| bool ViewportStationary[], |
| unsigned int ViewportXStartY[], |
| unsigned int ViewportYStartY[], |
| unsigned int ViewportXStartC[], |
| unsigned int ViewportYStartC[], |
| unsigned int ViewportWidthY[], |
| unsigned int ViewportHeightY[], |
| unsigned int BytesPerPixelY[], |
| unsigned int ViewportWidthC[], |
| unsigned int ViewportHeightC[], |
| unsigned int BytesPerPixelC[], |
| unsigned int SurfaceWidthY[], |
| unsigned int SurfaceWidthC[], |
| unsigned int SurfaceHeightY[], |
| unsigned int SurfaceHeightC[], |
| unsigned int Read256BytesBlockWidthY[], |
| unsigned int Read256BytesBlockWidthC[], |
| unsigned int Read256BytesBlockHeightY[], |
| unsigned int Read256BytesBlockHeightC[], |
| unsigned int ReadBlockWidthY[], |
| unsigned int ReadBlockWidthC[], |
| unsigned int ReadBlockHeightY[], |
| unsigned int ReadBlockHeightC[], |
| unsigned int DCCMetaPitchY[], |
| unsigned int DCCMetaPitchC[], |
| |
| /* Output */ |
| unsigned int SurfaceSizeInMALL[], |
| bool *ExceededMALLSize) |
| { |
| unsigned int k; |
| unsigned int TotalSurfaceSizeInMALLForSS = 0; |
| unsigned int TotalSurfaceSizeInMALLForSubVP = 0; |
| unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (ViewportStationary[k]) { |
| SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), |
| dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, |
| ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], |
| ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], |
| ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + |
| ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - |
| dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; |
| |
| if (ReadBlockWidthC[k] > 0) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), |
| dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + |
| ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - |
| dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * |
| dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), |
| dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + |
| ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - |
| dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * |
| BytesPerPixelC[k]; |
| } |
| if (DCCEnable[k] == true) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]), |
| dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * |
| Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) |
| - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) |
| * dml_min(dml_ceil(SurfaceHeightY[k], 8 * |
| Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + |
| ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * |
| Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * |
| Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); |
| if (Read256BytesBlockWidthC[k] > 0) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| dml_min(dml_ceil(DCCMetaPitchC[k], 8 * |
| Read256BytesBlockWidthC[k]), |
| dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 |
| * Read256BytesBlockWidthC[k] - 1, 8 * |
| Read256BytesBlockWidthC[k]) - |
| dml_floor(ViewportXStartC[k], 8 * |
| Read256BytesBlockWidthC[k])) * |
| dml_min(dml_ceil(SurfaceHeightC[k], 8 * |
| Read256BytesBlockHeightC[k]), |
| dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + |
| 8 * Read256BytesBlockHeightC[k] - 1, 8 * |
| Read256BytesBlockHeightC[k]) - |
| dml_floor(ViewportYStartC[k], 8 * |
| Read256BytesBlockHeightC[k])) * |
| BytesPerPixelC[k] / 256; |
| } |
| } |
| } else { |
| SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + |
| ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * |
| dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + |
| ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * |
| BytesPerPixelY[k]; |
| if (ReadBlockWidthC[k] > 0) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + |
| ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * |
| dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + |
| ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * |
| BytesPerPixelC[k]; |
| } |
| if (DCCEnable[k] == true) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 * |
| Read256BytesBlockWidthY[k] - 1), 8 * |
| Read256BytesBlockWidthY[k]) * |
| dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * |
| Read256BytesBlockHeightY[k] - 1), 8 * |
| Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); |
| |
| if (Read256BytesBlockWidthC[k] > 0) { |
| SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + |
| dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 * |
| Read256BytesBlockWidthC[k] - 1), 8 * |
| Read256BytesBlockWidthC[k]) * |
| dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * |
| Read256BytesBlockHeightC[k] - 1), 8 * |
| Read256BytesBlockHeightC[k]) * |
| BytesPerPixelC[k] / 256; |
| } |
| } |
| } |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| /* SS and Subvp counted separate as they are never used at the same time */ |
| if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) |
| TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k]; |
| else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) |
| TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k]; |
| } |
| *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || |
| (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); |
| } // CalculateSurfaceSizeInMall |
| |
| void dml32_CalculateVMRowAndSwath( |
| unsigned int NumberOfActiveSurfaces, |
| DmlPipe myPipe[], |
| unsigned int SurfaceSizeInMALL[], |
| unsigned int PTEBufferSizeInRequestsLuma, |
| unsigned int PTEBufferSizeInRequestsChroma, |
| unsigned int DCCMetaBufferSizeBytes, |
| enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], |
| unsigned int MALLAllocatedForDCN, |
| double SwathWidthY[], |
| double SwathWidthC[], |
| bool GPUVMEnable, |
| bool HostVMEnable, |
| unsigned int HostVMMaxNonCachedPageTableLevels, |
| unsigned int GPUVMMaxPageTableLevels, |
| unsigned int GPUVMMinPageSizeKBytes[], |
| unsigned int HostVMMinPageSize, |
| |
| /* Output */ |
| bool PTEBufferSizeNotExceeded[], |
| bool DCCMetaBufferSizeNotExceeded[], |
| unsigned int dpte_row_width_luma_ub[], |
| unsigned int dpte_row_width_chroma_ub[], |
| unsigned int dpte_row_height_luma[], |
| unsigned int dpte_row_height_chroma[], |
| unsigned int dpte_row_height_linear_luma[], // VBA_DELTA |
| unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA |
| unsigned int meta_req_width[], |
| unsigned int meta_req_width_chroma[], |
| unsigned int meta_req_height[], |
| unsigned int meta_req_height_chroma[], |
| unsigned int meta_row_width[], |
| unsigned int meta_row_width_chroma[], |
| unsigned int meta_row_height[], |
| unsigned int meta_row_height_chroma[], |
| unsigned int vm_group_bytes[], |
| unsigned int dpte_group_bytes[], |
| unsigned int PixelPTEReqWidthY[], |
| unsigned int PixelPTEReqHeightY[], |
| unsigned int PTERequestSizeY[], |
| unsigned int PixelPTEReqWidthC[], |
| unsigned int PixelPTEReqHeightC[], |
| unsigned int PTERequestSizeC[], |
| unsigned int dpde0_bytes_per_frame_ub_l[], |
| unsigned int meta_pte_bytes_per_frame_ub_l[], |
| unsigned int dpde0_bytes_per_frame_ub_c[], |
| unsigned int meta_pte_bytes_per_frame_ub_c[], |
| double PrefetchSourceLinesY[], |
| double PrefetchSourceLinesC[], |
| double VInitPreFillY[], |
| double VInitPreFillC[], |
| unsigned int MaxNumSwathY[], |
| unsigned int MaxNumSwathC[], |
| double meta_row_bw[], |
| double dpte_row_bw[], |
| double PixelPTEBytesPerRow[], |
| double PDEAndMetaPTEBytesFrame[], |
| double MetaRowByte[], |
| bool use_one_row_for_frame[], |
| bool use_one_row_for_frame_flip[], |
| bool UsesMALLForStaticScreen[], |
| bool PTE_BUFFER_MODE[], |
| unsigned int BIGK_FRAGMENT_SIZE[]) |
| { |
| unsigned int k; |
| unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; |
| unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; |
| unsigned int PDEAndMetaPTEBytesFrameY; |
| unsigned int PDEAndMetaPTEBytesFrameC; |
| unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; |
| unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; |
| unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; |
| unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; |
| unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; |
| unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; |
| unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; |
| unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; |
| unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; |
| unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; |
| bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (HostVMEnable == true) { |
| vm_group_bytes[k] = 512; |
| dpte_group_bytes[k] = 512; |
| } else if (GPUVMEnable == true) { |
| vm_group_bytes[k] = 2048; |
| if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) |
| dpte_group_bytes[k] = 512; |
| else |
| dpte_group_bytes[k] = 2048; |
| } else { |
| vm_group_bytes[k] = 0; |
| dpte_group_bytes[k] = 0; |
| } |
| |
| if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || |
| myPipe[k].SourcePixelFormat == dm_420_12 || |
| myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { |
| if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && |
| !IsVertical(myPipe[k].SourceRotation)) { |
| PTEBufferSizeInRequestsForLuma[k] = |
| (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; |
| PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; |
| } else { |
| PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; |
| PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; |
| } |
| |
| PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( |
| myPipe[k].ViewportStationary, |
| myPipe[k].DCCEnable, |
| myPipe[k].DPPPerSurface, |
| myPipe[k].BlockHeight256BytesC, |
| myPipe[k].BlockWidth256BytesC, |
| myPipe[k].SourcePixelFormat, |
| myPipe[k].SurfaceTiling, |
| myPipe[k].BytePerPixelC, |
| myPipe[k].SourceRotation, |
| SwathWidthC[k], |
| myPipe[k].ViewportHeightChroma, |
| myPipe[k].ViewportXStartC, |
| myPipe[k].ViewportYStartC, |
| GPUVMEnable, |
| HostVMEnable, |
| HostVMMaxNonCachedPageTableLevels, |
| GPUVMMaxPageTableLevels, |
| GPUVMMinPageSizeKBytes[k], |
| HostVMMinPageSize, |
| PTEBufferSizeInRequestsForChroma[k], |
| myPipe[k].PitchC, |
| myPipe[k].DCCMetaPitchC, |
| myPipe[k].BlockWidthC, |
| myPipe[k].BlockHeightC, |
| |
| /* Output */ |
| &MetaRowByteC[k], |
| &PixelPTEBytesPerRowC[k], |
| &dpte_row_width_chroma_ub[k], |
| &dpte_row_height_chroma[k], |
| &dpte_row_height_linear_chroma[k], |
| &PixelPTEBytesPerRowC_one_row_per_frame[k], |
| &dpte_row_width_chroma_ub_one_row_per_frame[k], |
| &dpte_row_height_chroma_one_row_per_frame[k], |
| &meta_req_width_chroma[k], |
| &meta_req_height_chroma[k], |
| &meta_row_width_chroma[k], |
| &meta_row_height_chroma[k], |
| &PixelPTEReqWidthC[k], |
| &PixelPTEReqHeightC[k], |
| &PTERequestSizeC[k], |
| &dpde0_bytes_per_frame_ub_c[k], |
| &meta_pte_bytes_per_frame_ub_c[k]); |
| |
| PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( |
| myPipe[k].VRatioChroma, |
| myPipe[k].VTapsChroma, |
| myPipe[k].InterlaceEnable, |
| myPipe[k].ProgressiveToInterlaceUnitInOPP, |
| myPipe[k].SwathHeightC, |
| myPipe[k].SourceRotation, |
| myPipe[k].ViewportStationary, |
| SwathWidthC[k], |
| myPipe[k].ViewportHeightChroma, |
| myPipe[k].ViewportXStartC, |
| myPipe[k].ViewportYStartC, |
| |
| /* Output */ |
| &VInitPreFillC[k], |
| &MaxNumSwathC[k]); |
| } else { |
| PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; |
| PTEBufferSizeInRequestsForChroma[k] = 0; |
| PixelPTEBytesPerRowC[k] = 0; |
| PDEAndMetaPTEBytesFrameC = 0; |
| MetaRowByteC[k] = 0; |
| MaxNumSwathC[k] = 0; |
| PrefetchSourceLinesC[k] = 0; |
| dpte_row_height_chroma_one_row_per_frame[k] = 0; |
| dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; |
| PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; |
| } |
| |
| PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( |
| myPipe[k].ViewportStationary, |
| myPipe[k].DCCEnable, |
| myPipe[k].DPPPerSurface, |
| myPipe[k].BlockHeight256BytesY, |
| myPipe[k].BlockWidth256BytesY, |
| myPipe[k].SourcePixelFormat, |
| myPipe[k].SurfaceTiling, |
| myPipe[k].BytePerPixelY, |
| myPipe[k].SourceRotation, |
| SwathWidthY[k], |
| myPipe[k].ViewportHeight, |
| myPipe[k].ViewportXStart, |
| myPipe[k].ViewportYStart, |
| GPUVMEnable, |
| HostVMEnable, |
| HostVMMaxNonCachedPageTableLevels, |
| GPUVMMaxPageTableLevels, |
| GPUVMMinPageSizeKBytes[k], |
| HostVMMinPageSize, |
| PTEBufferSizeInRequestsForLuma[k], |
| myPipe[k].PitchY, |
| myPipe[k].DCCMetaPitchY, |
| myPipe[k].BlockWidthY, |
| myPipe[k].BlockHeightY, |
| |
| /* Output */ |
| &MetaRowByteY[k], |
| &PixelPTEBytesPerRowY[k], |
| &dpte_row_width_luma_ub[k], |
| &dpte_row_height_luma[k], |
| &dpte_row_height_linear_luma[k], |
| &PixelPTEBytesPerRowY_one_row_per_frame[k], |
| &dpte_row_width_luma_ub_one_row_per_frame[k], |
| &dpte_row_height_luma_one_row_per_frame[k], |
| &meta_req_width[k], |
| &meta_req_height[k], |
| &meta_row_width[k], |
| &meta_row_height[k], |
| &PixelPTEReqWidthY[k], |
| &PixelPTEReqHeightY[k], |
| &PTERequestSizeY[k], |
| &dpde0_bytes_per_frame_ub_l[k], |
| &meta_pte_bytes_per_frame_ub_l[k]); |
| |
| PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( |
| myPipe[k].VRatio, |
| myPipe[k].VTaps, |
| myPipe[k].InterlaceEnable, |
| myPipe[k].ProgressiveToInterlaceUnitInOPP, |
| myPipe[k].SwathHeightY, |
| myPipe[k].SourceRotation, |
| myPipe[k].ViewportStationary, |
| SwathWidthY[k], |
| myPipe[k].ViewportHeight, |
| myPipe[k].ViewportXStart, |
| myPipe[k].ViewportYStart, |
| |
| /* Output */ |
| &VInitPreFillY[k], |
| &MaxNumSwathY[k]); |
| |
| PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; |
| MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; |
| |
| if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && |
| PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { |
| PTEBufferSizeNotExceeded[k] = true; |
| } else { |
| PTEBufferSizeNotExceeded[k] = false; |
| } |
| |
| one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * |
| PTEBufferSizeInRequestsForLuma[k] && |
| PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); |
| } |
| |
| dml32_CalculateMALLUseForStaticScreen( |
| NumberOfActiveSurfaces, |
| MALLAllocatedForDCN, |
| UseMALLForStaticScreen, // mode |
| SurfaceSizeInMALL, |
| one_row_per_frame_fits_in_buffer, |
| /* Output */ |
| UsesMALLForStaticScreen); // boolen |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || |
| (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || |
| (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || |
| (GPUVMMinPageSizeKBytes[k] > 64); |
| BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); |
| dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); |
| #endif |
| use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || |
| (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || |
| (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || |
| (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); |
| |
| use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && |
| !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); |
| |
| if (use_one_row_for_frame[k]) { |
| dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; |
| dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; |
| PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; |
| dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; |
| dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; |
| PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; |
| PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; |
| } |
| |
| if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) |
| DCCMetaBufferSizeNotExceeded[k] = true; |
| else |
| DCCMetaBufferSizeNotExceeded[k] = false; |
| |
| PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; |
| if (use_one_row_for_frame[k]) |
| PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; |
| |
| dml32_CalculateRowBandwidth( |
| GPUVMEnable, |
| myPipe[k].SourcePixelFormat, |
| myPipe[k].VRatio, |
| myPipe[k].VRatioChroma, |
| myPipe[k].DCCEnable, |
| myPipe[k].HTotal / myPipe[k].PixelClock, |
| MetaRowByteY[k], MetaRowByteC[k], |
| meta_row_height[k], |
| meta_row_height_chroma[k], |
| PixelPTEBytesPerRowY[k], |
| PixelPTEBytesPerRowC[k], |
| dpte_row_height_luma[k], |
| dpte_row_height_chroma[k], |
| |
| /* Output */ |
| &meta_row_bw[k], |
| &dpte_row_bw[k]); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); |
| dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", |
| __func__, k, use_one_row_for_frame_flip[k]); |
| dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", |
| __func__, k, UseMALLForPStateChange[k]); |
| dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); |
| dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", |
| __func__, k, dpte_row_width_luma_ub[k]); |
| dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); |
| dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", |
| __func__, k, dpte_row_height_chroma[k]); |
| dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", |
| __func__, k, dpte_row_width_chroma_ub[k]); |
| dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); |
| dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); |
| dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", |
| __func__, k, PTEBufferSizeNotExceeded[k]); |
| dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); |
| dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); |
| #endif |
| } |
| } // CalculateVMRowAndSwath |
| |
| unsigned int dml32_CalculateVMAndRowBytes( |
| bool ViewportStationary, |
| bool DCCEnable, |
| unsigned int NumberOfDPPs, |
| unsigned int BlockHeight256Bytes, |
| unsigned int BlockWidth256Bytes, |
| enum source_format_class SourcePixelFormat, |
| unsigned int SurfaceTiling, |
| unsigned int BytePerPixel, |
| enum dm_rotation_angle SourceRotation, |
| double SwathWidth, |
| unsigned int ViewportHeight, |
| unsigned int ViewportXStart, |
| unsigned int ViewportYStart, |
| bool GPUVMEnable, |
| bool HostVMEnable, |
| unsigned int HostVMMaxNonCachedPageTableLevels, |
| unsigned int GPUVMMaxPageTableLevels, |
| unsigned int GPUVMMinPageSizeKBytes, |
| unsigned int HostVMMinPageSize, |
| unsigned int PTEBufferSizeInRequests, |
| unsigned int Pitch, |
| unsigned int DCCMetaPitch, |
| unsigned int MacroTileWidth, |
| unsigned int MacroTileHeight, |
| |
| /* Output */ |
| unsigned int *MetaRowByte, |
| unsigned int *PixelPTEBytesPerRow, |
| unsigned int *dpte_row_width_ub, |
| unsigned int *dpte_row_height, |
| unsigned int *dpte_row_height_linear, |
| unsigned int *PixelPTEBytesPerRow_one_row_per_frame, |
| unsigned int *dpte_row_width_ub_one_row_per_frame, |
| unsigned int *dpte_row_height_one_row_per_frame, |
| unsigned int *MetaRequestWidth, |
| unsigned int *MetaRequestHeight, |
| unsigned int *meta_row_width, |
| unsigned int *meta_row_height, |
| unsigned int *PixelPTEReqWidth, |
| unsigned int *PixelPTEReqHeight, |
| unsigned int *PTERequestSize, |
| unsigned int *DPDE0BytesFrame, |
| unsigned int *MetaPTEBytesFrame) |
| { |
| unsigned int MPDEBytesFrame; |
| unsigned int DCCMetaSurfaceBytes; |
| unsigned int ExtraDPDEBytesFrame; |
| unsigned int PDEAndMetaPTEBytesFrame; |
| unsigned int HostVMDynamicLevels = 0; |
| unsigned int MacroTileSizeBytes; |
| unsigned int vp_height_meta_ub; |
| unsigned int vp_height_dpte_ub; |
| unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this |
| |
| if (GPUVMEnable == true && HostVMEnable == true) { |
| if (HostVMMinPageSize < 2048) |
| HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; |
| else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) |
| HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); |
| else |
| HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); |
| } |
| |
| *MetaRequestHeight = 8 * BlockHeight256Bytes; |
| *MetaRequestWidth = 8 * BlockWidth256Bytes; |
| if (SurfaceTiling == dm_sw_linear) { |
| *meta_row_height = 32; |
| *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) |
| - dml_floor(ViewportXStart, *MetaRequestWidth); |
| } else if (!IsVertical(SourceRotation)) { |
| *meta_row_height = *MetaRequestHeight; |
| if (ViewportStationary && NumberOfDPPs == 1) { |
| *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, |
| *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); |
| } else { |
| *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; |
| } |
| *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; |
| } else { |
| *meta_row_height = *MetaRequestWidth; |
| if (ViewportStationary && NumberOfDPPs == 1) { |
| *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, |
| *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); |
| } else { |
| *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; |
| } |
| *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; |
| } |
| |
| if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { |
| vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, |
| 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); |
| } else if (!IsVertical(SourceRotation)) { |
| vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; |
| } else { |
| vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; |
| } |
| |
| DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; |
| |
| if (GPUVMEnable == true) { |
| *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / |
| (8 * 4.0 * 1024), 1) + 1) * 64; |
| MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); |
| } else { |
| *MetaPTEBytesFrame = 0; |
| MPDEBytesFrame = 0; |
| } |
| |
| if (DCCEnable != true) { |
| *MetaPTEBytesFrame = 0; |
| MPDEBytesFrame = 0; |
| *MetaRowByte = 0; |
| } |
| |
| MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; |
| |
| if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { |
| if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { |
| vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + |
| MacroTileHeight - 1, MacroTileHeight) - |
| dml_floor(ViewportYStart, MacroTileHeight); |
| } else if (!IsVertical(SourceRotation)) { |
| vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; |
| } else { |
| vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; |
| } |
| *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / |
| (8 * 2097152), 1) + 1); |
| ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); |
| } else { |
| *DPDE0BytesFrame = 0; |
| ExtraDPDEBytesFrame = 0; |
| vp_height_dpte_ub = 0; |
| } |
| |
| PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); |
| dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); |
| dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); |
| dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); |
| dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); |
| dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); |
| dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); |
| dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); |
| dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); |
| dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); |
| dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); |
| dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); |
| dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); |
| dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); |
| dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); |
| dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); |
| dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); |
| #endif |
| |
| if (HostVMEnable == true) |
| PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); |
| |
| if (SurfaceTiling == dm_sw_linear) { |
| *PixelPTEReqHeight = 1; |
| *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; |
| PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; |
| *PTERequestSize = 64; |
| } else if (GPUVMMinPageSizeKBytes == 4) { |
| *PixelPTEReqHeight = 16 * BlockHeight256Bytes; |
| *PixelPTEReqWidth = 16 * BlockWidth256Bytes; |
| *PTERequestSize = 128; |
| } else { |
| *PixelPTEReqHeight = MacroTileHeight; |
| *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); |
| *PTERequestSize = 64; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); |
| dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); |
| dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); |
| dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); |
| dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); |
| dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); |
| dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); |
| #endif |
| |
| *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; |
| *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / |
| (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * |
| (double) *PixelPTEReqWidth; |
| *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * |
| *PTERequestSize; |
| |
| if (SurfaceTiling == dm_sw_linear) { |
| *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * |
| *PixelPTEReqWidth / Pitch), 1)); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, |
| PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); |
| dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, |
| dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); |
| dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, |
| dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); |
| dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, |
| 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * |
| *PixelPTEReqWidth / Pitch), 1)); |
| dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); |
| #endif |
| *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), |
| (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; |
| *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; |
| |
| // VBA_DELTA, VBA doesn't have programming value for pte row height linear. |
| *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * |
| PixelPTEReqWidth_linear / Pitch), 1); |
| if (*dpte_row_height_linear > 128) |
| *dpte_row_height_linear = 128; |
| |
| } else if (!IsVertical(SourceRotation)) { |
| *dpte_row_height = *PixelPTEReqHeight; |
| |
| if (GPUVMMinPageSizeKBytes > 64) { |
| *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / |
| *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; |
| } else if (ViewportStationary && (NumberOfDPPs == 1)) { |
| *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + |
| *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - |
| dml_floor(ViewportXStart, *PixelPTEReqWidth); |
| } else { |
| *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * |
| *PixelPTEReqWidth; |
| } |
| |
| *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; |
| } else { |
| *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); |
| |
| if (ViewportStationary && (NumberOfDPPs == 1)) { |
| *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, |
| *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); |
| } else { |
| *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) |
| * *PixelPTEReqHeight; |
| } |
| |
| *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; |
| } |
| |
| if (GPUVMEnable != true) |
| *PixelPTEBytesPerRow = 0; |
| if (HostVMEnable == true) |
| *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); |
| dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); |
| dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); |
| dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); |
| dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); |
| dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); |
| dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); |
| dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", |
| __func__, *dpte_row_width_ub_one_row_per_frame); |
| dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", |
| __func__, *PixelPTEBytesPerRow_one_row_per_frame); |
| dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", |
| *MetaPTEBytesFrame); |
| #endif |
| |
| return PDEAndMetaPTEBytesFrame; |
| } // CalculateVMAndRowBytes |
| |
| double dml32_CalculatePrefetchSourceLines( |
| double VRatio, |
| unsigned int VTaps, |
| bool Interlace, |
| bool ProgressiveToInterlaceUnitInOPP, |
| unsigned int SwathHeight, |
| enum dm_rotation_angle SourceRotation, |
| bool ViewportStationary, |
| double SwathWidth, |
| unsigned int ViewportHeight, |
| unsigned int ViewportXStart, |
| unsigned int ViewportYStart, |
| |
| /* Output */ |
| double *VInitPreFill, |
| unsigned int *MaxNumSwath) |
| { |
| |
| unsigned int vp_start_rot; |
| unsigned int sw0_tmp; |
| unsigned int MaxPartialSwath; |
| double numLines; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); |
| dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); |
| dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); |
| dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); |
| dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); |
| dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); |
| #endif |
| if (ProgressiveToInterlaceUnitInOPP) |
| *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); |
| else |
| *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); |
| |
| if (ViewportStationary) { |
| if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { |
| vp_start_rot = SwathHeight - |
| (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); |
| } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { |
| vp_start_rot = ViewportXStart; |
| } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { |
| vp_start_rot = SwathHeight - |
| (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); |
| } else { |
| vp_start_rot = ViewportYStart; |
| } |
| sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); |
| if (sw0_tmp < *VInitPreFill) |
| *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; |
| else |
| *MaxNumSwath = 1; |
| MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); |
| } else { |
| *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; |
| if (*VInitPreFill > 1) |
| MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); |
| else |
| MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); |
| } |
| numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); |
| dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); |
| dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); |
| dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); |
| dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); |
| #endif |
| return numLines; |
| |
| } // CalculatePrefetchSourceLines |
| |
| void dml32_CalculateMALLUseForStaticScreen( |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int MALLAllocatedForDCNFinal, |
| enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, |
| unsigned int SurfaceSizeInMALL[], |
| bool one_row_per_frame_fits_in_buffer[], |
| |
| /* output */ |
| bool UsesMALLForStaticScreen[]) |
| { |
| unsigned int k; |
| unsigned int SurfaceToAddToMALL; |
| bool CanAddAnotherSurfaceToMALL; |
| unsigned int TotalSurfaceSizeInMALL; |
| |
| TotalSurfaceSizeInMALL = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); |
| if (UsesMALLForStaticScreen[k]) |
| TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); |
| dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); |
| #endif |
| } |
| |
| SurfaceToAddToMALL = 0; |
| CanAddAnotherSurfaceToMALL = true; |
| while (CanAddAnotherSurfaceToMALL) { |
| CanAddAnotherSurfaceToMALL = false; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && |
| !UsesMALLForStaticScreen[k] && |
| UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && |
| one_row_per_frame_fits_in_buffer[k] && |
| (!CanAddAnotherSurfaceToMALL || |
| SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { |
| CanAddAnotherSurfaceToMALL = true; |
| SurfaceToAddToMALL = k; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", |
| __func__, k, UseMALLForStaticScreen[k]); |
| #endif |
| } |
| } |
| if (CanAddAnotherSurfaceToMALL) { |
| UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; |
| TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); |
| dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); |
| #endif |
| |
| } |
| } |
| } |
| |
| void dml32_CalculateRowBandwidth( |
| bool GPUVMEnable, |
| enum source_format_class SourcePixelFormat, |
| double VRatio, |
| double VRatioChroma, |
| bool DCCEnable, |
| double LineTime, |
| unsigned int MetaRowByteLuma, |
| unsigned int MetaRowByteChroma, |
| unsigned int meta_row_height_luma, |
| unsigned int meta_row_height_chroma, |
| unsigned int PixelPTEBytesPerRowLuma, |
| unsigned int PixelPTEBytesPerRowChroma, |
| unsigned int dpte_row_height_luma, |
| unsigned int dpte_row_height_chroma, |
| /* Output */ |
| double *meta_row_bw, |
| double *dpte_row_bw) |
| { |
| if (DCCEnable != true) { |
| *meta_row_bw = 0; |
| } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || |
| SourcePixelFormat == dm_rgbe_alpha) { |
| *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * |
| MetaRowByteChroma / (meta_row_height_chroma * LineTime); |
| } else { |
| *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); |
| } |
| |
| if (GPUVMEnable != true) { |
| *dpte_row_bw = 0; |
| } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || |
| SourcePixelFormat == dm_rgbe_alpha) { |
| *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + |
| VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); |
| } else { |
| *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); |
| } |
| } |
| |
| double dml32_CalculateUrgentLatency( |
| double UrgentLatencyPixelDataOnly, |
| double UrgentLatencyPixelMixedWithVMData, |
| double UrgentLatencyVMDataOnly, |
| bool DoUrgentLatencyAdjustment, |
| double UrgentLatencyAdjustmentFabricClockComponent, |
| double UrgentLatencyAdjustmentFabricClockReference, |
| double FabricClock) |
| { |
| double ret; |
| |
| ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); |
| if (DoUrgentLatencyAdjustment == true) { |
| ret = ret + UrgentLatencyAdjustmentFabricClockComponent * |
| (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); |
| } |
| return ret; |
| } |
| |
| void dml32_CalculateUrgentBurstFactor( |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, |
| unsigned int swath_width_luma_ub, |
| unsigned int swath_width_chroma_ub, |
| unsigned int SwathHeightY, |
| unsigned int SwathHeightC, |
| double LineTime, |
| double UrgentLatency, |
| double CursorBufferSize, |
| unsigned int CursorWidth, |
| unsigned int CursorBPP, |
| double VRatio, |
| double VRatioC, |
| double BytePerPixelInDETY, |
| double BytePerPixelInDETC, |
| unsigned int DETBufferSizeY, |
| unsigned int DETBufferSizeC, |
| /* Output */ |
| double *UrgentBurstFactorCursor, |
| double *UrgentBurstFactorLuma, |
| double *UrgentBurstFactorChroma, |
| bool *NotEnoughUrgentLatencyHiding) |
| { |
| double LinesInDETLuma; |
| double LinesInDETChroma; |
| unsigned int LinesInCursorBuffer; |
| double CursorBufferSizeInTime; |
| double DETBufferSizeInTimeLuma; |
| double DETBufferSizeInTimeChroma; |
| |
| *NotEnoughUrgentLatencyHiding = 0; |
| |
| if (CursorWidth > 0) { |
| LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / |
| (CursorWidth * CursorBPP / 8.0)), 1.0); |
| if (VRatio > 0) { |
| CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; |
| if (CursorBufferSizeInTime - UrgentLatency <= 0) { |
| *NotEnoughUrgentLatencyHiding = 1; |
| *UrgentBurstFactorCursor = 0; |
| } else { |
| *UrgentBurstFactorCursor = CursorBufferSizeInTime / |
| (CursorBufferSizeInTime - UrgentLatency); |
| } |
| } else { |
| *UrgentBurstFactorCursor = 1; |
| } |
| } |
| |
| LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : |
| DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; |
| |
| if (VRatio > 0) { |
| DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; |
| if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { |
| *NotEnoughUrgentLatencyHiding = 1; |
| *UrgentBurstFactorLuma = 0; |
| } else { |
| *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); |
| } |
| } else { |
| *UrgentBurstFactorLuma = 1; |
| } |
| |
| if (BytePerPixelInDETC > 0) { |
| LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? |
| 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC |
| / swath_width_chroma_ub; |
| |
| if (VRatio > 0) { |
| DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; |
| if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { |
| *NotEnoughUrgentLatencyHiding = 1; |
| *UrgentBurstFactorChroma = 0; |
| } else { |
| *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma |
| / (DETBufferSizeInTimeChroma - UrgentLatency); |
| } |
| } else { |
| *UrgentBurstFactorChroma = 1; |
| } |
| } |
| } // CalculateUrgentBurstFactor |
| |
| void dml32_CalculateDCFCLKDeepSleep( |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int BytePerPixelY[], |
| unsigned int BytePerPixelC[], |
| double VRatio[], |
| double VRatioChroma[], |
| double SwathWidthY[], |
| double SwathWidthC[], |
| unsigned int DPPPerSurface[], |
| double HRatio[], |
| double HRatioChroma[], |
| double PixelClock[], |
| double PSCL_THROUGHPUT[], |
| double PSCL_THROUGHPUT_CHROMA[], |
| double Dppclk[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| unsigned int ReturnBusWidth, |
| |
| /* Output */ |
| double *DCFClkDeepSleep) |
| { |
| unsigned int k; |
| double DisplayPipeLineDeliveryTimeLuma; |
| double DisplayPipeLineDeliveryTimeChroma; |
| double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; |
| double ReadBandwidth = 0.0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| |
| if (VRatio[k] <= 1) { |
| DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] |
| / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; |
| } |
| if (BytePerPixelC[k] == 0) { |
| DisplayPipeLineDeliveryTimeChroma = 0; |
| } else { |
| if (VRatioChroma[k] <= 1) { |
| DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * |
| DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] |
| / Dppclk[k]; |
| } |
| } |
| |
| if (BytePerPixelC[k] > 0) { |
| DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * |
| BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, |
| __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / |
| 32.0 / DisplayPipeLineDeliveryTimeChroma); |
| } else { |
| DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / |
| 64.0 / DisplayPipeLineDeliveryTimeLuma; |
| } |
| DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); |
| dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); |
| #endif |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) |
| ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; |
| |
| *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); |
| dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); |
| dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); |
| dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); |
| #endif |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) |
| *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); |
| #endif |
| } // CalculateDCFCLKDeepSleep |
| |
| double dml32_CalculateWriteBackDelay( |
| enum source_format_class WritebackPixelFormat, |
| double WritebackHRatio, |
| double WritebackVRatio, |
| unsigned int WritebackVTaps, |
| unsigned int WritebackDestinationWidth, |
| unsigned int WritebackDestinationHeight, |
| unsigned int WritebackSourceHeight, |
| unsigned int HTotal) |
| { |
| double CalculateWriteBackDelay; |
| double Line_length; |
| double Output_lines_last_notclamped; |
| double WritebackVInit; |
| |
| WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; |
| Line_length = dml_max((double) WritebackDestinationWidth, |
| dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); |
| Output_lines_last_notclamped = WritebackDestinationHeight - 1 - |
| dml_ceil(((double)WritebackSourceHeight - |
| (double) WritebackVInit) / (double)WritebackVRatio, 1.0); |
| if (Output_lines_last_notclamped < 0) { |
| CalculateWriteBackDelay = 0; |
| } else { |
| CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + |
| (HTotal - WritebackDestinationWidth) + 80; |
| } |
| return CalculateWriteBackDelay; |
| } |
| |
| void dml32_UseMinimumDCFCLK( |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], |
| bool DRRDisplay[], |
| bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, |
| unsigned int MaxInterDCNTileRepeaters, |
| unsigned int MaxPrefetchMode, |
| double DRAMClockChangeLatencyFinal, |
| double FCLKChangeLatency, |
| double SREnterPlusExitTime, |
| unsigned int ReturnBusWidth, |
| unsigned int RoundTripPingLatencyCycles, |
| unsigned int ReorderingBytes, |
| unsigned int PixelChunkSizeInKByte, |
| unsigned int MetaChunkSize, |
| bool GPUVMEnable, |
| unsigned int GPUVMMaxPageTableLevels, |
| bool HostVMEnable, |
| unsigned int NumberOfActiveSurfaces, |
| double HostVMMinPageSize, |
| unsigned int HostVMMaxNonCachedPageTableLevels, |
| bool DynamicMetadataVMEnabled, |
| bool ImmediateFlipRequirement, |
| bool ProgressiveToInterlaceUnitInOPP, |
| double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, |
| double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, |
| unsigned int VTotal[], |
| unsigned int VActive[], |
| unsigned int DynamicMetadataTransmittedBytes[], |
| unsigned int DynamicMetadataLinesBeforeActiveRequired[], |
| bool Interlace[], |
| double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], |
| double RequiredDISPCLK[][2], |
| double UrgLatency[], |
| unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], |
| double ProjectedDCFClkDeepSleep[][2], |
| double MaximumVStartup[][2][DC__NUM_DPP__MAX], |
| unsigned int TotalNumberOfActiveDPP[][2], |
| unsigned int TotalNumberOfDCCActiveDPP[][2], |
| unsigned int dpte_group_bytes[], |
| double PrefetchLinesY[][2][DC__NUM_DPP__MAX], |
| double PrefetchLinesC[][2][DC__NUM_DPP__MAX], |
| unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], |
| unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], |
| unsigned int BytePerPixelY[], |
| unsigned int BytePerPixelC[], |
| unsigned int HTotal[], |
| double PixelClock[], |
| double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], |
| double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], |
| double MetaRowBytes[][2][DC__NUM_DPP__MAX], |
| bool DynamicMetadataEnable[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double DCFCLKPerState[], |
| /* Output */ |
| double DCFCLKState[][2]) |
| { |
| unsigned int i, j, k; |
| unsigned int dummy1; |
| double dummy2, dummy3; |
| double NormalEfficiency; |
| double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; |
| |
| NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; |
| for (i = 0; i < DC__VOLTAGE_STATES; ++i) { |
| for (j = 0; j <= 1; ++j) { |
| double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; |
| double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; |
| double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; |
| double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; |
| double MinimumTWait = 0.0; |
| double DPTEBandwidth; |
| double DCFCLKRequiredForAverageBandwidth; |
| unsigned int ExtraLatencyBytes; |
| double ExtraLatencyCycles; |
| double DCFCLKRequiredForPeakBandwidth; |
| unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; |
| double MinimumTvmPlus2Tr0; |
| |
| TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] |
| + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] |
| / (15.75 * HTotal[k] / PixelClock[k]); |
| } |
| |
| for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) |
| NoOfDPPState[k] = NoOfDPP[i][j][k]; |
| |
| DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; |
| DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); |
| |
| ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, |
| TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, |
| TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, |
| NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, |
| HostVMMaxNonCachedPageTableLevels); |
| ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ |
| + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| double DCFCLKCyclesRequiredInPrefetch; |
| double PrefetchTime; |
| |
| PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] |
| * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] |
| + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] |
| * BytePerPixelC[k]) / NormalEfficiency |
| / ReturnBusWidth; |
| DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] |
| + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency |
| / NormalEfficiency / ReturnBusWidth |
| * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) |
| + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency |
| / ReturnBusWidth |
| + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth |
| + PixelDCFCLKCyclesRequiredInPrefetch[k]; |
| PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) |
| * HTotal[k] / PixelClock[k]; |
| DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && |
| DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? |
| UrgLatency[i] * GPUVMMaxPageTableLevels * |
| (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; |
| |
| MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, |
| UseMALLForPStateChange[k], |
| SynchronizeDRRDisplaysForUCLKPStateChangeFinal, |
| DRRDisplay[k], |
| DRAMClockChangeLatencyFinal, |
| FCLKChangeLatency, |
| UrgLatency[i], |
| SREnterPlusExitTime); |
| |
| PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - |
| MinimumTWait - UrgLatency[i] * |
| ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : |
| GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? |
| HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - |
| DynamicMetadataVMExtraLatency[k]; |
| |
| if (PrefetchTime > 0) { |
| double ExpectedVRatioPrefetch; |
| |
| ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * |
| PixelDCFCLKCyclesRequiredInPrefetch[k] / |
| DCFCLKCyclesRequiredInPrefetch); |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * |
| PixelDCFCLKCyclesRequiredInPrefetch[k] / |
| PrefetchPixelLinesTime[k] * |
| dml_max(1.0, ExpectedVRatioPrefetch) * |
| dml_max(1.0, ExpectedVRatioPrefetch / 4); |
| if (HostVMEnable == true || ImmediateFlipRequirement == true) { |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] = |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] + |
| NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / |
| NormalEfficiency / ReturnBusWidth; |
| } |
| } else { |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; |
| } |
| if (DynamicMetadataEnable[k] == true) { |
| double TSetupPipe; |
| double TdmbfPipe; |
| double TdmsksPipe; |
| double TdmecPipe; |
| double AllowedTimeForUrgentExtraLatency; |
| |
| dml32_CalculateVUpdateAndDynamicMetadataParameters( |
| MaxInterDCNTileRepeaters, |
| RequiredDPPCLKPerSurface[i][j][k], |
| RequiredDISPCLK[i][j], |
| ProjectedDCFClkDeepSleep[i][j], |
| PixelClock[k], |
| HTotal[k], |
| VTotal[k] - VActive[k], |
| DynamicMetadataTransmittedBytes[k], |
| DynamicMetadataLinesBeforeActiveRequired[k], |
| Interlace[k], |
| ProgressiveToInterlaceUnitInOPP, |
| |
| /* output */ |
| &TSetupPipe, |
| &TdmbfPipe, |
| &TdmecPipe, |
| &TdmsksPipe, |
| &dummy1, |
| &dummy2, |
| &dummy3); |
| AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / |
| PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - |
| TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; |
| if (AllowedTimeForUrgentExtraLatency > 0) |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] = |
| dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], |
| ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); |
| else |
| DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; |
| } |
| } |
| DCFCLKRequiredForPeakBandwidth = 0; |
| for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { |
| DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + |
| DCFCLKRequiredForPeakBandwidthPerSurface[k]; |
| } |
| MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? |
| (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * |
| (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| double MaximumTvmPlus2Tr0PlusTsw; |
| |
| MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / |
| PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; |
| if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { |
| DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; |
| } else { |
| DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, |
| 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - |
| MinimumTvmPlus2Tr0 - |
| PrefetchPixelLinesTime[k] / 4), |
| (2 * ExtraLatencyCycles + |
| PixelDCFCLKCyclesRequiredInPrefetch[k]) / |
| (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); |
| } |
| } |
| DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * |
| dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); |
| } |
| } |
| } |
| |
| unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, |
| unsigned int TotalNumberOfActiveDPP, |
| unsigned int PixelChunkSizeInKByte, |
| unsigned int TotalNumberOfDCCActiveDPP, |
| unsigned int MetaChunkSize, |
| bool GPUVMEnable, |
| bool HostVMEnable, |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int NumberOfDPP[], |
| unsigned int dpte_group_bytes[], |
| double HostVMInefficiencyFactor, |
| double HostVMMinPageSize, |
| unsigned int HostVMMaxNonCachedPageTableLevels) |
| { |
| unsigned int k; |
| double ret; |
| unsigned int HostVMDynamicLevels; |
| |
| if (GPUVMEnable == true && HostVMEnable == true) { |
| if (HostVMMinPageSize < 2048) |
| HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; |
| else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) |
| HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); |
| else |
| HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); |
| } else { |
| HostVMDynamicLevels = 0; |
| } |
| |
| ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + |
| TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; |
| |
| if (GPUVMEnable == true) { |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * |
| (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; |
| } |
| } |
| return ret; |
| } |
| |
| void dml32_CalculateVUpdateAndDynamicMetadataParameters( |
| unsigned int MaxInterDCNTileRepeaters, |
| double Dppclk, |
| double Dispclk, |
| double DCFClkDeepSleep, |
| double PixelClock, |
| unsigned int HTotal, |
| unsigned int VBlank, |
| unsigned int DynamicMetadataTransmittedBytes, |
| unsigned int DynamicMetadataLinesBeforeActiveRequired, |
| unsigned int InterlaceEnable, |
| bool ProgressiveToInterlaceUnitInOPP, |
| |
| /* output */ |
| double *TSetup, |
| double *Tdmbf, |
| double *Tdmec, |
| double *Tdmsks, |
| unsigned int *VUpdateOffsetPix, |
| double *VUpdateWidthPix, |
| double *VReadyOffsetPix) |
| { |
| double TotalRepeaterDelayTime; |
| |
| TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); |
| *VUpdateWidthPix = |
| dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); |
| *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, |
| TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); |
| *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); |
| *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; |
| *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; |
| *Tdmec = HTotal / PixelClock; |
| |
| if (DynamicMetadataLinesBeforeActiveRequired == 0) |
| *Tdmsks = VBlank * HTotal / PixelClock / 2.0; |
| else |
| *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; |
| |
| if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) |
| *Tdmsks = *Tdmsks / 2; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); |
| dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); |
| dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); |
| |
| dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", |
| __func__, DynamicMetadataLinesBeforeActiveRequired); |
| dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); |
| dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); |
| dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); |
| dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); |
| #endif |
| } |
| |
| double dml32_CalculateTWait( |
| unsigned int PrefetchMode, |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, |
| bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, |
| bool DRRDisplay, |
| double DRAMClockChangeLatency, |
| double FCLKChangeLatency, |
| double UrgentLatency, |
| double SREnterPlusExitTime) |
| { |
| double TWait = 0.0; |
| |
| if (PrefetchMode == 0 && |
| !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && |
| !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && |
| !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && |
| !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { |
| TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); |
| } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { |
| TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); |
| } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { |
| TWait = dml_max(SREnterPlusExitTime, UrgentLatency); |
| } else { |
| TWait = UrgentLatency; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); |
| dml_print("DML::%s: TWait = %f\n", __func__, TWait); |
| #endif |
| return TWait; |
| } // CalculateTWait |
| |
| // Function: get_return_bw_mbps |
| // Megabyte per second |
| double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, |
| const int VoltageLevel, |
| const bool HostVMEnable, |
| const double DCFCLK, |
| const double FabricClock, |
| const double DRAMSpeed) |
| { |
| double ReturnBW = 0.; |
| double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; |
| double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; |
| double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; |
| double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, |
| IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, |
| IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : |
| soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); |
| double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, |
| IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, |
| IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : |
| soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); |
| |
| if (HostVMEnable != true) |
| ReturnBW = PixelDataOnlyReturnBW; |
| else |
| ReturnBW = PixelMixedWithVMDataReturnBW; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); |
| dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); |
| dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); |
| dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); |
| dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); |
| dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); |
| dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); |
| dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); |
| dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); |
| dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); |
| dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); |
| #endif |
| return ReturnBW; |
| } |
| |
| // Function: get_return_bw_mbps_vm_only |
| // Megabyte per second |
| double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, |
| const int VoltageLevel, |
| const double DCFCLK, |
| const double FabricClock, |
| const double DRAMSpeed) |
| { |
| double VMDataOnlyReturnBW = dml_min3( |
| soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, |
| FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes |
| * soc->pct_ideal_sdp_bw_after_urgent / 100.0, |
| DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes |
| * (VoltageLevel < 2 ? |
| soc->pct_ideal_dram_bw_after_urgent_strobe : |
| soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); |
| dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); |
| dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); |
| dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); |
| dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); |
| #endif |
| return VMDataOnlyReturnBW; |
| } |
| |
| double dml32_CalculateExtraLatency( |
| unsigned int RoundTripPingLatencyCycles, |
| unsigned int ReorderingBytes, |
| double DCFCLK, |
| unsigned int TotalNumberOfActiveDPP, |
| unsigned int PixelChunkSizeInKByte, |
| unsigned int TotalNumberOfDCCActiveDPP, |
| unsigned int MetaChunkSize, |
| double ReturnBW, |
| bool GPUVMEnable, |
| bool HostVMEnable, |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int NumberOfDPP[], |
| unsigned int dpte_group_bytes[], |
| double HostVMInefficiencyFactor, |
| double HostVMMinPageSize, |
| unsigned int HostVMMaxNonCachedPageTableLevels) |
| { |
| double ExtraLatencyBytes; |
| double ExtraLatency; |
| |
| ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( |
| ReorderingBytes, |
| TotalNumberOfActiveDPP, |
| PixelChunkSizeInKByte, |
| TotalNumberOfDCCActiveDPP, |
| MetaChunkSize, |
| GPUVMEnable, |
| HostVMEnable, |
| NumberOfActiveSurfaces, |
| NumberOfDPP, |
| dpte_group_bytes, |
| HostVMInefficiencyFactor, |
| HostVMMinPageSize, |
| HostVMMaxNonCachedPageTableLevels); |
| |
| ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); |
| dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); |
| dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); |
| dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); |
| dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); |
| #endif |
| |
| return ExtraLatency; |
| } // CalculateExtraLatency |
| |
| bool dml32_CalculatePrefetchSchedule( |
| struct vba_vars_st *v, |
| unsigned int k, |
| double HostVMInefficiencyFactor, |
| DmlPipe *myPipe, |
| unsigned int DSCDelay, |
| unsigned int DPP_RECOUT_WIDTH, |
| unsigned int VStartup, |
| unsigned int MaxVStartup, |
| double UrgentLatency, |
| double UrgentExtraLatency, |
| double TCalc, |
| unsigned int PDEAndMetaPTEBytesFrame, |
| unsigned int MetaRowByte, |
| unsigned int PixelPTEBytesPerRow, |
| double PrefetchSourceLinesY, |
| unsigned int SwathWidthY, |
| unsigned int VInitPreFillY, |
| unsigned int MaxNumSwathY, |
| double PrefetchSourceLinesC, |
| unsigned int SwathWidthC, |
| unsigned int VInitPreFillC, |
| unsigned int MaxNumSwathC, |
| unsigned int swath_width_luma_ub, |
| unsigned int swath_width_chroma_ub, |
| unsigned int SwathHeightY, |
| unsigned int SwathHeightC, |
| double TWait, |
| double TPreReq, |
| bool ExtendPrefetchIfPossible, |
| /* Output */ |
| double *DSTXAfterScaler, |
| double *DSTYAfterScaler, |
| double *DestinationLinesForPrefetch, |
| double *PrefetchBandwidth, |
| double *DestinationLinesToRequestVMInVBlank, |
| double *DestinationLinesToRequestRowInVBlank, |
| double *VRatioPrefetchY, |
| double *VRatioPrefetchC, |
| double *RequiredPrefetchPixDataBWLuma, |
| double *RequiredPrefetchPixDataBWChroma, |
| bool *NotEnoughTimeForDynamicMetadata, |
| double *Tno_bw, |
| double *prefetch_vmrow_bw, |
| double *Tdmdl_vm, |
| double *Tdmdl, |
| double *TSetup, |
| unsigned int *VUpdateOffsetPix, |
| double *VUpdateWidthPix, |
| double *VReadyOffsetPix) |
| { |
| double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; |
| bool MyError = false; |
| unsigned int DPPCycles, DISPCLKCycles; |
| double DSTTotalPixelsAfterScaler; |
| double LineTime; |
| double dst_y_prefetch_equ; |
| double prefetch_bw_oto; |
| double Tvm_oto; |
| double Tr0_oto; |
| double Tvm_oto_lines; |
| double Tr0_oto_lines; |
| double dst_y_prefetch_oto; |
| double TimeForFetchingMetaPTE = 0; |
| double TimeForFetchingRowInVBlank = 0; |
| double LinesToRequestPrefetchPixelData = 0; |
| double LinesForPrefetchBandwidth = 0; |
| unsigned int HostVMDynamicLevelsTrips; |
| double trip_to_mem; |
| double Tvm_trips; |
| double Tr0_trips; |
| double Tvm_trips_rounded; |
| double Tr0_trips_rounded; |
| double Lsw_oto; |
| double Tpre_rounded; |
| double prefetch_bw_equ; |
| double Tvm_equ; |
| double Tr0_equ; |
| double Tdmbf; |
| double Tdmec; |
| double Tdmsks; |
| double prefetch_sw_bytes; |
| double bytes_pp; |
| double dep_bytes; |
| unsigned int max_vratio_pre = v->MaxVRatioPre; |
| double min_Lsw; |
| double Tsw_est1 = 0; |
| double Tsw_est3 = 0; |
| |
| if (v->GPUVMEnable == true && v->HostVMEnable == true) |
| HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; |
| else |
| HostVMDynamicLevelsTrips = 0; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); |
| dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); |
| dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); |
| dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", |
| __func__, v->HostVMEnable, HostVMInefficiencyFactor); |
| #endif |
| dml32_CalculateVUpdateAndDynamicMetadataParameters( |
| v->MaxInterDCNTileRepeaters, |
| myPipe->Dppclk, |
| myPipe->Dispclk, |
| myPipe->DCFClkDeepSleep, |
| myPipe->PixelClock, |
| myPipe->HTotal, |
| myPipe->VBlank, |
| v->DynamicMetadataTransmittedBytes[k], |
| v->DynamicMetadataLinesBeforeActiveRequired[k], |
| myPipe->InterlaceEnable, |
| myPipe->ProgressiveToInterlaceUnitInOPP, |
| TSetup, |
| |
| /* output */ |
| &Tdmbf, |
| &Tdmec, |
| &Tdmsks, |
| VUpdateOffsetPix, |
| VUpdateWidthPix, |
| VReadyOffsetPix); |
| |
| LineTime = myPipe->HTotal / myPipe->PixelClock; |
| trip_to_mem = UrgentLatency; |
| Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); |
| |
| if (v->DynamicMetadataVMEnabled == true) |
| *Tdmdl = TWait + Tvm_trips + trip_to_mem; |
| else |
| *Tdmdl = TWait + UrgentExtraLatency; |
| |
| #ifdef __DML_VBA_ALLOW_DELTA__ |
| if (v->DynamicMetadataEnable[k] == false) |
| *Tdmdl = 0.0; |
| #endif |
| |
| if (v->DynamicMetadataEnable[k] == true) { |
| if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { |
| *NotEnoughTimeForDynamicMetadata = true; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); |
| dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", |
| __func__, Tdmbf); |
| dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); |
| dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", |
| __func__, Tdmsks); |
| dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", |
| __func__, *Tdmdl); |
| #endif |
| } else { |
| *NotEnoughTimeForDynamicMetadata = false; |
| } |
| } else { |
| *NotEnoughTimeForDynamicMetadata = false; |
| } |
| |
| *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && |
| v->GPUVMEnable == true ? TWait + Tvm_trips : 0); |
| |
| if (myPipe->ScalerEnabled) |
| DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; |
| else |
| DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; |
| |
| DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; |
| |
| DISPCLKCycles = v->DISPCLKDelaySubtotal; |
| |
| if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) |
| return true; |
| |
| *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * |
| myPipe->PixelClock / myPipe->Dispclk + DSCDelay; |
| |
| *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) |
| + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH |
| + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? |
| myPipe->HActive / 2 : 0) |
| + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); |
| dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); |
| dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); |
| dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); |
| dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); |
| dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); |
| dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); |
| dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); |
| dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); |
| #endif |
| |
| if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) |
| *DSTYAfterScaler = 1; |
| else |
| *DSTYAfterScaler = 0; |
| |
| DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; |
| *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); |
| *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); |
| dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); |
| #endif |
| |
| MyError = false; |
| |
| Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); |
| |
| if (v->GPUVMEnable == true) { |
| Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; |
| Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; |
| if (v->GPUVMMaxPageTableLevels >= 3) { |
| *Tno_bw = UrgentExtraLatency + trip_to_mem * |
| (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); |
| } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { |
| Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / |
| 4.0 * LineTime; // VBA_ERROR |
| *Tno_bw = UrgentExtraLatency; |
| } else { |
| *Tno_bw = 0; |
| } |
| } else if (myPipe->DCCEnable == true) { |
| Tvm_trips_rounded = LineTime / 4.0; |
| Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; |
| *Tno_bw = 0; |
| } else { |
| Tvm_trips_rounded = LineTime / 4.0; |
| Tr0_trips_rounded = LineTime / 2.0; |
| *Tno_bw = 0; |
| } |
| Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); |
| Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); |
| |
| if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 |
| || myPipe->SourcePixelFormat == dm_420_12) { |
| bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; |
| } else { |
| bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; |
| } |
| |
| prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY |
| + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; |
| prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, |
| prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); |
| |
| min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; |
| min_Lsw = dml_max(min_Lsw, 1.0); |
| Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; |
| |
| if (v->GPUVMEnable == true) { |
| Tvm_oto = dml_max3( |
| Tvm_trips, |
| *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, |
| LineTime / 4.0); |
| } else |
| Tvm_oto = LineTime / 4.0; |
| |
| if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { |
| Tr0_oto = dml_max4( |
| Tr0_trips, |
| (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, |
| (LineTime - Tvm_oto)/2.0, |
| LineTime / 4.0); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, |
| (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); |
| dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); |
| dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); |
| dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); |
| #endif |
| } else |
| Tr0_oto = (LineTime - Tvm_oto) / 2.0; |
| |
| Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; |
| Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; |
| dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; |
| |
| dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - |
| (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); |
| |
| dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); |
| dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); |
| dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); |
| dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); |
| dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); |
| dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); |
| dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); |
| dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); |
| dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); |
| dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); |
| dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); |
| dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); |
| dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); |
| dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); |
| dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); |
| dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); |
| dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); |
| dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); |
| dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); |
| dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); |
| dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); |
| dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); |
| dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); |
| dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); |
| dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); |
| dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); |
| dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); |
| #endif |
| |
| dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; |
| Tpre_rounded = dst_y_prefetch_equ * LineTime; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); |
| dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); |
| dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); |
| dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", |
| __func__, VStartup * LineTime); |
| dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); |
| dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); |
| dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); |
| dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); |
| dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); |
| dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); |
| dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", |
| __func__, *DSTYAfterScaler); |
| #endif |
| dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, |
| MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); |
| |
| if (prefetch_sw_bytes < dep_bytes) |
| prefetch_sw_bytes = 2 * dep_bytes; |
| |
| *PrefetchBandwidth = 0; |
| *DestinationLinesToRequestVMInVBlank = 0; |
| *DestinationLinesToRequestRowInVBlank = 0; |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBWLuma = 0; |
| if (dst_y_prefetch_equ > 1 && |
| (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { |
| double PrefetchBandwidth1; |
| double PrefetchBandwidth2; |
| double PrefetchBandwidth3; |
| double PrefetchBandwidth4; |
| |
| if (Tpre_rounded - *Tno_bw > 0) { |
| PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte |
| + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor |
| + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); |
| Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; |
| } else |
| PrefetchBandwidth1 = 0; |
| |
| if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) |
| && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { |
| PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte |
| + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
| / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); |
| } |
| |
| if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) |
| PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / |
| (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); |
| else |
| PrefetchBandwidth2 = 0; |
| |
| if (Tpre_rounded - Tvm_trips_rounded > 0) { |
| PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor |
| + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); |
| Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; |
| } else |
| PrefetchBandwidth3 = 0; |
| |
| |
| if (VStartup == MaxVStartup && |
| (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * |
| LineTime - Tvm_trips_rounded > 0) { |
| PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
| / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); |
| } |
| |
| if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { |
| PrefetchBandwidth4 = prefetch_sw_bytes / |
| (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); |
| } else { |
| PrefetchBandwidth4 = 0; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); |
| dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); |
| dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); |
| dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); |
| dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); |
| dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); |
| dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); |
| dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); |
| dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); |
| #endif |
| { |
| bool Case1OK; |
| bool Case2OK; |
| bool Case3OK; |
| |
| if (PrefetchBandwidth1 > 0) { |
| if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 |
| >= Tvm_trips_rounded |
| && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
| / PrefetchBandwidth1 >= Tr0_trips_rounded) { |
| Case1OK = true; |
| } else { |
| Case1OK = false; |
| } |
| } else { |
| Case1OK = false; |
| } |
| |
| if (PrefetchBandwidth2 > 0) { |
| if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 |
| >= Tvm_trips_rounded |
| && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
| / PrefetchBandwidth2 < Tr0_trips_rounded) { |
| Case2OK = true; |
| } else { |
| Case2OK = false; |
| } |
| } else { |
| Case2OK = false; |
| } |
| |
| if (PrefetchBandwidth3 > 0) { |
| if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < |
| Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * |
| HostVMInefficiencyFactor) / PrefetchBandwidth3 >= |
| Tr0_trips_rounded) { |
| Case3OK = true; |
| } else { |
| Case3OK = false; |
| } |
| } else { |
| Case3OK = false; |
| } |
| |
| if (Case1OK) |
| prefetch_bw_equ = PrefetchBandwidth1; |
| else if (Case2OK) |
| prefetch_bw_equ = PrefetchBandwidth2; |
| else if (Case3OK) |
| prefetch_bw_equ = PrefetchBandwidth3; |
| else |
| prefetch_bw_equ = PrefetchBandwidth4; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); |
| dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); |
| dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); |
| dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); |
| #endif |
| |
| if (prefetch_bw_equ > 0) { |
| if (v->GPUVMEnable == true) { |
| Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * |
| HostVMInefficiencyFactor / prefetch_bw_equ, |
| Tvm_trips, LineTime / 4); |
| } else { |
| Tvm_equ = LineTime / 4; |
| } |
| |
| if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { |
| Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * |
| HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, |
| (LineTime - Tvm_equ) / 2, LineTime / 4); |
| } else { |
| Tr0_equ = (LineTime - Tvm_equ) / 2; |
| } |
| } else { |
| Tvm_equ = 0; |
| Tr0_equ = 0; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); |
| #endif |
| } |
| } |
| |
| if (dst_y_prefetch_oto < dst_y_prefetch_equ) { |
| if (dst_y_prefetch_oto * LineTime < TPreReq) { |
| *DestinationLinesForPrefetch = dst_y_prefetch_equ; |
| } else { |
| *DestinationLinesForPrefetch = dst_y_prefetch_oto; |
| } |
| TimeForFetchingMetaPTE = Tvm_oto; |
| TimeForFetchingRowInVBlank = Tr0_oto; |
| *PrefetchBandwidth = prefetch_bw_oto; |
| /* Clamp to oto for bandwidth calculation */ |
| LinesForPrefetchBandwidth = dst_y_prefetch_oto; |
| } else { |
| /* For mode programming we want to extend the prefetch as much as possible |
| * (up to oto, or as long as we can for equ) if we're not already applying |
| * the 60us prefetch requirement. This is to avoid intermittent underflow |
| * issues during prefetch. |
| * |
| * The prefetch extension is applied under the following scenarios: |
| * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank) |
| * 2. We're using subvp or drr methods of p-state switch, in which case we |
| * we don't care if prefetch takes up more of the blanking time |
| * |
| * Mode programming typically chooses the smallest prefetch time possible |
| * (i.e. highest bandwidth during prefetch) presumably to create margin between |
| * p-states / c-states that happen in vblank and prefetch. Therefore we only |
| * apply this prefetch extension when p-state in vblank is not required (UCLK |
| * p-states take up the most vblank time). |
| */ |
| if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) { |
| MyError = true; |
| } else { |
| *DestinationLinesForPrefetch = dst_y_prefetch_equ; |
| TimeForFetchingMetaPTE = Tvm_equ; |
| TimeForFetchingRowInVBlank = Tr0_equ; |
| *PrefetchBandwidth = prefetch_bw_equ; |
| /* Clamp to equ for bandwidth calculation */ |
| LinesForPrefetchBandwidth = dst_y_prefetch_equ; |
| } |
| } |
| |
| *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; |
| |
| *DestinationLinesToRequestRowInVBlank = |
| dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; |
| |
| LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth - |
| *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); |
| dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", |
| __func__, *DestinationLinesToRequestVMInVBlank); |
| dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); |
| dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); |
| dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", |
| __func__, *DestinationLinesToRequestRowInVBlank); |
| dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); |
| dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); |
| #endif |
| |
| if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { |
| *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; |
| *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); |
| dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); |
| dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); |
| #endif |
| if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { |
| if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { |
| *VRatioPrefetchY = |
| dml_max((double) PrefetchSourceLinesY / |
| LinesToRequestPrefetchPixelData, |
| (double) MaxNumSwathY * SwathHeightY / |
| (LinesToRequestPrefetchPixelData - |
| (VInitPreFillY - 3.0) / 2.0)); |
| *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
| } else { |
| MyError = true; |
| *VRatioPrefetchY = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); |
| dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); |
| dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); |
| #endif |
| } |
| |
| *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; |
| *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); |
| dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); |
| dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); |
| #endif |
| if ((SwathHeightC > 4)) { |
| if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { |
| *VRatioPrefetchC = |
| dml_max(*VRatioPrefetchC, |
| (double) MaxNumSwathC * SwathHeightC / |
| (LinesToRequestPrefetchPixelData - |
| (VInitPreFillC - 3.0) / 2.0)); |
| *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
| } else { |
| MyError = true; |
| *VRatioPrefetchC = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); |
| dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); |
| dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); |
| #endif |
| } |
| |
| *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY |
| / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub |
| / LineTime; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); |
| dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); |
| dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); |
| dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", |
| __func__, *RequiredPrefetchPixDataBWLuma); |
| #endif |
| *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / |
| LinesToRequestPrefetchPixelData |
| * myPipe->BytePerPixelC |
| * swath_width_chroma_ub / LineTime; |
| } else { |
| MyError = true; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", |
| __func__, LinesToRequestPrefetchPixelData); |
| #endif |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBWLuma = 0; |
| *RequiredPrefetchPixDataBWChroma = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", |
| (double)LinesToRequestPrefetchPixelData * LineTime + |
| 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); |
| dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); |
| dml_print("DML: To: %fus - time for propagation from scaler to optc\n", |
| (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); |
| dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); |
| dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - |
| TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + |
| ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); |
| dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", |
| PixelPTEBytesPerRow); |
| #endif |
| } else { |
| MyError = true; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", |
| __func__, dst_y_prefetch_equ); |
| #endif |
| } |
| |
| { |
| double prefetch_vm_bw; |
| double prefetch_row_bw; |
| |
| if (PDEAndMetaPTEBytesFrame == 0) { |
| prefetch_vm_bw = 0; |
| } else if (*DestinationLinesToRequestVMInVBlank > 0) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); |
| dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); |
| dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", |
| __func__, *DestinationLinesToRequestVMInVBlank); |
| dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); |
| #endif |
| prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / |
| (*DestinationLinesToRequestVMInVBlank * LineTime); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); |
| #endif |
| } else { |
| prefetch_vm_bw = 0; |
| MyError = true; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", |
| __func__, *DestinationLinesToRequestVMInVBlank); |
| #endif |
| } |
| |
| if (MetaRowByte + PixelPTEBytesPerRow == 0) { |
| prefetch_row_bw = 0; |
| } else if (*DestinationLinesToRequestRowInVBlank > 0) { |
| prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / |
| (*DestinationLinesToRequestRowInVBlank * LineTime); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); |
| dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); |
| dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", |
| __func__, *DestinationLinesToRequestRowInVBlank); |
| dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); |
| #endif |
| } else { |
| prefetch_row_bw = 0; |
| MyError = true; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", |
| __func__, *DestinationLinesToRequestRowInVBlank); |
| #endif |
| } |
| |
| *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); |
| } |
| |
| if (MyError) { |
| *PrefetchBandwidth = 0; |
| TimeForFetchingMetaPTE = 0; |
| TimeForFetchingRowInVBlank = 0; |
| *DestinationLinesToRequestVMInVBlank = 0; |
| *DestinationLinesToRequestRowInVBlank = 0; |
| *DestinationLinesForPrefetch = 0; |
| LinesToRequestPrefetchPixelData = 0; |
| *VRatioPrefetchY = 0; |
| *VRatioPrefetchC = 0; |
| *RequiredPrefetchPixDataBWLuma = 0; |
| *RequiredPrefetchPixDataBWChroma = 0; |
| } |
| |
| return MyError; |
| } // CalculatePrefetchSchedule |
| |
| void dml32_CalculateFlipSchedule( |
| double HostVMInefficiencyFactor, |
| double UrgentExtraLatency, |
| double UrgentLatency, |
| unsigned int GPUVMMaxPageTableLevels, |
| bool HostVMEnable, |
| unsigned int HostVMMaxNonCachedPageTableLevels, |
| bool GPUVMEnable, |
| double HostVMMinPageSize, |
| double PDEAndMetaPTEBytesPerFrame, |
| double MetaRowBytes, |
| double DPTEBytesPerRow, |
| double BandwidthAvailableForImmediateFlip, |
| unsigned int TotImmediateFlipBytes, |
| enum source_format_class SourcePixelFormat, |
| double LineTime, |
| double VRatio, |
| double VRatioChroma, |
| double Tno_bw, |
| bool DCCEnable, |
| unsigned int dpte_row_height, |
| unsigned int meta_row_height, |
| unsigned int dpte_row_height_chroma, |
| unsigned int meta_row_height_chroma, |
| bool use_one_row_for_frame_flip, |
| |
| /* Output */ |
| double *DestinationLinesToRequestVMInImmediateFlip, |
| double *DestinationLinesToRequestRowInImmediateFlip, |
| double *final_flip_bw, |
| bool *ImmediateFlipSupportedForPipe) |
| { |
| double min_row_time = 0.0; |
| unsigned int HostVMDynamicLevelsTrips; |
| double TimeForFetchingMetaPTEImmediateFlip; |
| double TimeForFetchingRowInVBlankImmediateFlip; |
| double ImmediateFlipBW = 1.0; |
| |
| if (GPUVMEnable == true && HostVMEnable == true) |
| HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; |
| else |
| HostVMDynamicLevelsTrips = 0; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); |
| dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); |
| #endif |
| |
| if (TotImmediateFlipBytes > 0) { |
| if (use_one_row_for_frame_flip) { |
| ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * |
| BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; |
| } else { |
| ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * |
| BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; |
| } |
| if (GPUVMEnable == true) { |
| TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * |
| HostVMInefficiencyFactor / ImmediateFlipBW, |
| UrgentExtraLatency + UrgentLatency * |
| (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), |
| LineTime / 4.0); |
| } else { |
| TimeForFetchingMetaPTEImmediateFlip = 0; |
| } |
| if ((GPUVMEnable == true || DCCEnable == true)) { |
| TimeForFetchingRowInVBlankImmediateFlip = dml_max3( |
| (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, |
| UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); |
| } else { |
| TimeForFetchingRowInVBlankImmediateFlip = 0; |
| } |
| |
| *DestinationLinesToRequestVMInImmediateFlip = |
| dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; |
| *DestinationLinesToRequestRowInImmediateFlip = |
| dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; |
| |
| if (GPUVMEnable == true) { |
| *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / |
| (*DestinationLinesToRequestVMInImmediateFlip * LineTime), |
| (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / |
| (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); |
| } else if ((GPUVMEnable == true || DCCEnable == true)) { |
| *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / |
| (*DestinationLinesToRequestRowInImmediateFlip * LineTime); |
| } else { |
| *final_flip_bw = 0; |
| } |
| } else { |
| TimeForFetchingMetaPTEImmediateFlip = 0; |
| TimeForFetchingRowInVBlankImmediateFlip = 0; |
| *DestinationLinesToRequestVMInImmediateFlip = 0; |
| *DestinationLinesToRequestRowInImmediateFlip = 0; |
| *final_flip_bw = 0; |
| } |
| |
| if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { |
| if (GPUVMEnable == true && DCCEnable != true) { |
| min_row_time = dml_min(dpte_row_height * |
| LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); |
| } else if (GPUVMEnable != true && DCCEnable == true) { |
| min_row_time = dml_min(meta_row_height * |
| LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); |
| } else { |
| min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * |
| LineTime / VRatio, dpte_row_height_chroma * LineTime / |
| VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); |
| } |
| } else { |
| if (GPUVMEnable == true && DCCEnable != true) { |
| min_row_time = dpte_row_height * LineTime / VRatio; |
| } else if (GPUVMEnable != true && DCCEnable == true) { |
| min_row_time = meta_row_height * LineTime / VRatio; |
| } else { |
| min_row_time = |
| dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); |
| } |
| } |
| |
| if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 |
| || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip |
| > min_row_time) { |
| *ImmediateFlipSupportedForPipe = false; |
| } else { |
| *ImmediateFlipSupportedForPipe = true; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); |
| dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); |
| dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", |
| __func__, *DestinationLinesToRequestVMInImmediateFlip); |
| dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", |
| __func__, *DestinationLinesToRequestRowInImmediateFlip); |
| dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); |
| dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", |
| __func__, TimeForFetchingRowInVBlankImmediateFlip); |
| dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); |
| dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); |
| #endif |
| } // CalculateFlipSchedule |
| |
| void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( |
| struct vba_vars_st *v, |
| unsigned int PrefetchMode, |
| double DCFCLK, |
| double ReturnBW, |
| SOCParametersList mmSOCParameters, |
| double SOCCLK, |
| double DCFClkDeepSleep, |
| unsigned int DETBufferSizeY[], |
| unsigned int DETBufferSizeC[], |
| unsigned int SwathHeightY[], |
| unsigned int SwathHeightC[], |
| double SwathWidthY[], |
| double SwathWidthC[], |
| unsigned int DPPPerSurface[], |
| double BytePerPixelDETY[], |
| double BytePerPixelDETC[], |
| double DSTXAfterScaler[], |
| double DSTYAfterScaler[], |
| bool UnboundedRequestEnabled, |
| unsigned int CompressedBufferSizeInkByte, |
| |
| /* Output */ |
| enum clock_change_support *DRAMClockChangeSupport, |
| double MaxActiveDRAMClockChangeLatencySupported[], |
| unsigned int SubViewportLinesNeededInMALL[], |
| enum dm_fclock_change_support *FCLKChangeSupport, |
| double *MinActiveFCLKChangeLatencySupported, |
| bool *USRRetrainingSupport, |
| double ActiveDRAMClockChangeLatencyMargin[]) |
| { |
| unsigned int i, j, k; |
| unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; |
| unsigned int DRAMClockChangeSupportNumber = 0; |
| unsigned int LastSurfaceWithoutMargin; |
| unsigned int DRAMClockChangeMethod = 0; |
| bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; |
| double MinActiveFCLKChangeMargin = 0.; |
| double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; |
| double ActiveClockChangeLatencyHidingY; |
| double ActiveClockChangeLatencyHidingC; |
| double ActiveClockChangeLatencyHiding; |
| double EffectiveDETBufferSizeY; |
| double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; |
| double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; |
| double TotalPixelBW = 0.0; |
| bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; |
| double EffectiveLBLatencyHidingY; |
| double EffectiveLBLatencyHidingC; |
| double LinesInDETY[DC__NUM_DPP__MAX]; |
| double LinesInDETC[DC__NUM_DPP__MAX]; |
| unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; |
| unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; |
| double FullDETBufferingTimeY; |
| double FullDETBufferingTimeC; |
| double WritebackDRAMClockChangeLatencyMargin; |
| double WritebackFCLKChangeLatencyMargin; |
| double WritebackLatencyHiding; |
| bool SameTimingForFCLKChange; |
| |
| unsigned int TotalActiveWriteback = 0; |
| unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; |
| unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; |
| |
| v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; |
| v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency |
| + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; |
| v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; |
| v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; |
| v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency |
| + 10 / DCFClkDeepSleep; |
| v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency |
| + 10 / DCFClkDeepSleep; |
| v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency |
| + 10 / DCFClkDeepSleep; |
| v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time |
| + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); |
| dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); |
| dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); |
| dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); |
| dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); |
| dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); |
| dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); |
| dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); |
| dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); |
| dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); |
| dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", |
| __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); |
| #endif |
| |
| |
| TotalActiveWriteback = 0; |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if (v->WritebackEnable[k] == true) |
| TotalActiveWriteback = TotalActiveWriteback + 1; |
| } |
| |
| if (TotalActiveWriteback <= 1) { |
| v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; |
| } else { |
| v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency |
| + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; |
| } |
| if (v->USRRetrainingRequiredFinal) |
| v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark |
| + mmSOCParameters.USRRetrainingLatency; |
| |
| if (TotalActiveWriteback <= 1) { |
| v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency |
| + mmSOCParameters.WritebackLatency; |
| v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency |
| + mmSOCParameters.WritebackLatency; |
| } else { |
| v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency |
| + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; |
| v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency |
| + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; |
| } |
| |
| if (v->USRRetrainingRequiredFinal) |
| v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark |
| + mmSOCParameters.USRRetrainingLatency; |
| |
| if (v->USRRetrainingRequiredFinal) |
| v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark |
| + mmSOCParameters.USRRetrainingLatency; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", |
| __func__, v->Watermark.WritebackDRAMClockChangeWatermark); |
| dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); |
| dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); |
| dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); |
| dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); |
| #endif |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + |
| SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); |
| } |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| |
| LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); |
| LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); |
| |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); |
| dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); |
| dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); |
| dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); |
| dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); |
| #endif |
| |
| EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); |
| EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); |
| EffectiveDETBufferSizeY = DETBufferSizeY[k]; |
| |
| if (UnboundedRequestEnabled) { |
| EffectiveDETBufferSizeY = EffectiveDETBufferSizeY |
| + CompressedBufferSizeInkByte * 1024 |
| * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) |
| / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; |
| } |
| |
| LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; |
| LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); |
| FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; |
| |
| ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY |
| - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; |
| |
| if (v->NumberOfActiveSurfaces > 1) { |
| ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY |
| - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] |
| / v->PixelClock[k] / v->VRatio[k]; |
| } |
| |
| if (BytePerPixelDETC[k] > 0) { |
| LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; |
| LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); |
| FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) |
| / v->VRatioChroma[k]; |
| ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC |
| - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] |
| / v->PixelClock[k]; |
| if (v->NumberOfActiveSurfaces > 1) { |
| ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC |
| - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] |
| / v->PixelClock[k] / v->VRatioChroma[k]; |
| } |
| ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, |
| ActiveClockChangeLatencyHidingC); |
| } else { |
| ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; |
| } |
| |
| ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark |
| - v->Watermark.DRAMClockChangeWatermark; |
| ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark |
| - v->Watermark.FCLKChangeWatermark; |
| USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; |
| |
| if (v->WritebackEnable[k]) { |
| WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 |
| / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] |
| / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); |
| if (v->WritebackPixelFormat[k] == dm_444_64) |
| WritebackLatencyHiding = WritebackLatencyHiding / 2; |
| |
| WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding |
| - v->Watermark.WritebackDRAMClockChangeWatermark; |
| |
| WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding |
| - v->Watermark.WritebackFCLKChangeWatermark; |
| |
| ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], |
| WritebackFCLKChangeLatencyMargin); |
| ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], |
| WritebackDRAMClockChangeLatencyMargin); |
| } |
| MaxActiveDRAMClockChangeLatencySupported[k] = |
| (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? |
| 0 : |
| (ActiveDRAMClockChangeLatencyMargin[k] |
| + mmSOCParameters.DRAMClockChangeLatency); |
| } |
| |
| for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { |
| for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { |
| if (i == j || |
| (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || |
| (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || |
| (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || |
| (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && |
| v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && |
| v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && |
| (v->DRRDisplay[i] || v->DRRDisplay[j]))) { |
| SynchronizedSurfaces[i][j] = true; |
| } else { |
| SynchronizedSurfaces[i][j] = false; |
| } |
| } |
| } |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && |
| (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || |
| ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { |
| FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; |
| MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; |
| SurfaceWithMinActiveFCLKChangeMargin = k; |
| } |
| } |
| |
| *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; |
| |
| SameTimingForFCLKChange = true; |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { |
| if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && |
| (SameTimingForFCLKChange || |
| ActiveFCLKChangeLatencyMargin[k] < |
| SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { |
| SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; |
| } |
| SameTimingForFCLKChange = false; |
| } |
| } |
| |
| if (MinActiveFCLKChangeMargin > 0) { |
| *FCLKChangeSupport = dm_fclock_change_vactive; |
| } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && |
| (PrefetchMode <= 1)) { |
| *FCLKChangeSupport = dm_fclock_change_vblank; |
| } else { |
| *FCLKChangeSupport = dm_fclock_change_unsupported; |
| } |
| |
| *USRRetrainingSupport = true; |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && |
| (USRRetrainingLatencyMargin[k] < 0)) { |
| *USRRetrainingSupport = false; |
| } |
| } |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && |
| v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && |
| v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && |
| ActiveDRAMClockChangeLatencyMargin[k] < 0) { |
| if (PrefetchMode > 0) { |
| DRAMClockChangeSupportNumber = 2; |
| } else if (DRAMClockChangeSupportNumber == 0) { |
| DRAMClockChangeSupportNumber = 1; |
| LastSurfaceWithoutMargin = k; |
| } else if (DRAMClockChangeSupportNumber == 1 && |
| !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { |
| DRAMClockChangeSupportNumber = 2; |
| } |
| } |
| } |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) |
| DRAMClockChangeMethod = 1; |
| else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) |
| DRAMClockChangeMethod = 2; |
| } |
| |
| if (DRAMClockChangeMethod == 0) { |
| if (DRAMClockChangeSupportNumber == 0) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vactive; |
| else if (DRAMClockChangeSupportNumber == 1) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vblank; |
| else |
| *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; |
| } else if (DRAMClockChangeMethod == 1) { |
| if (DRAMClockChangeSupportNumber == 0) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; |
| else if (DRAMClockChangeSupportNumber == 1) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; |
| else |
| *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; |
| } else { |
| if (DRAMClockChangeSupportNumber == 0) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; |
| else if (DRAMClockChangeSupportNumber == 1) |
| *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; |
| else |
| *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; |
| } |
| |
| for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { |
| unsigned int dst_y_pstate; |
| unsigned int src_y_pstate_l; |
| unsigned int src_y_pstate_c; |
| unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; |
| |
| dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); |
| src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); |
| src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; |
| sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); |
| dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); |
| dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); |
| dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); |
| dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); |
| dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); |
| dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); |
| dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); |
| dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); |
| dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); |
| #endif |
| SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; |
| |
| if (BytePerPixelDETC[k] > 0) { |
| src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); |
| src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; |
| sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; |
| SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); |
| dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); |
| dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); |
| dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); |
| #endif |
| } |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); |
| dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); |
| dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", |
| __func__, *MinActiveFCLKChangeLatencySupported); |
| dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); |
| #endif |
| } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport |
| |
| double dml32_CalculateWriteBackDISPCLK( |
| enum source_format_class WritebackPixelFormat, |
| double PixelClock, |
| double WritebackHRatio, |
| double WritebackVRatio, |
| unsigned int WritebackHTaps, |
| unsigned int WritebackVTaps, |
| unsigned int WritebackSourceWidth, |
| unsigned int WritebackDestinationWidth, |
| unsigned int HTotal, |
| unsigned int WritebackLineBufferSize, |
| double DISPCLKDPPCLKVCOSpeed) |
| { |
| double DISPCLK_H, DISPCLK_V, DISPCLK_HB; |
| |
| DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; |
| DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; |
| DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * |
| WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; |
| return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); |
| } |
| |
| void dml32_CalculateMinAndMaxPrefetchMode( |
| enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, |
| unsigned int *MinPrefetchMode, |
| unsigned int *MaxPrefetchMode) |
| { |
| if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { |
| *MinPrefetchMode = 3; |
| *MaxPrefetchMode = 3; |
| } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { |
| *MinPrefetchMode = 2; |
| *MaxPrefetchMode = 2; |
| } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { |
| *MinPrefetchMode = 1; |
| *MaxPrefetchMode = 1; |
| } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { |
| *MinPrefetchMode = 0; |
| *MaxPrefetchMode = 0; |
| } else { |
| *MinPrefetchMode = 0; |
| *MaxPrefetchMode = 3; |
| } |
| } // CalculateMinAndMaxPrefetchMode |
| |
| void dml32_CalculatePixelDeliveryTimes( |
| unsigned int NumberOfActiveSurfaces, |
| double VRatio[], |
| double VRatioChroma[], |
| double VRatioPrefetchY[], |
| double VRatioPrefetchC[], |
| unsigned int swath_width_luma_ub[], |
| unsigned int swath_width_chroma_ub[], |
| unsigned int DPPPerSurface[], |
| double HRatio[], |
| double HRatioChroma[], |
| double PixelClock[], |
| double PSCL_THROUGHPUT[], |
| double PSCL_THROUGHPUT_CHROMA[], |
| double Dppclk[], |
| unsigned int BytePerPixelC[], |
| enum dm_rotation_angle SourceRotation[], |
| unsigned int NumberOfCursors[], |
| unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], |
| unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], |
| unsigned int BlockWidth256BytesY[], |
| unsigned int BlockHeight256BytesY[], |
| unsigned int BlockWidth256BytesC[], |
| unsigned int BlockHeight256BytesC[], |
| |
| /* Output */ |
| double DisplayPipeLineDeliveryTimeLuma[], |
| double DisplayPipeLineDeliveryTimeChroma[], |
| double DisplayPipeLineDeliveryTimeLumaPrefetch[], |
| double DisplayPipeLineDeliveryTimeChromaPrefetch[], |
| double DisplayPipeRequestDeliveryTimeLuma[], |
| double DisplayPipeRequestDeliveryTimeChroma[], |
| double DisplayPipeRequestDeliveryTimeLumaPrefetch[], |
| double DisplayPipeRequestDeliveryTimeChromaPrefetch[], |
| double CursorRequestDeliveryTime[], |
| double CursorRequestDeliveryTimePrefetch[]) |
| { |
| double req_per_swath_ub; |
| unsigned int k; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); |
| dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); |
| dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); |
| dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); |
| dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); |
| dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); |
| dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); |
| dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); |
| dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); |
| dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); |
| dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); |
| #endif |
| |
| if (VRatio[k] <= 1) { |
| DisplayPipeLineDeliveryTimeLuma[k] = |
| swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; |
| } |
| |
| if (BytePerPixelC[k] == 0) { |
| DisplayPipeLineDeliveryTimeChroma[k] = 0; |
| } else { |
| if (VRatioChroma[k] <= 1) { |
| DisplayPipeLineDeliveryTimeChroma[k] = |
| swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeChroma[k] = |
| swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; |
| } |
| } |
| |
| if (VRatioPrefetchY[k] <= 1) { |
| DisplayPipeLineDeliveryTimeLumaPrefetch[k] = |
| swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeLumaPrefetch[k] = |
| swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; |
| } |
| |
| if (BytePerPixelC[k] == 0) { |
| DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; |
| } else { |
| if (VRatioPrefetchC[k] <= 1) { |
| DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * |
| DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; |
| } else { |
| DisplayPipeLineDeliveryTimeChromaPrefetch[k] = |
| swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; |
| } |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", |
| __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", |
| __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", |
| __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", |
| __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); |
| #endif |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (!IsVertical(SourceRotation[k])) |
| req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; |
| else |
| req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); |
| #endif |
| |
| DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; |
| DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = |
| DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; |
| if (BytePerPixelC[k] == 0) { |
| DisplayPipeRequestDeliveryTimeChroma[k] = 0; |
| DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; |
| } else { |
| if (!IsVertical(SourceRotation[k])) |
| req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; |
| else |
| req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); |
| #endif |
| DisplayPipeRequestDeliveryTimeChroma[k] = |
| DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; |
| DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = |
| DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", |
| __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", |
| __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", |
| __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); |
| dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", |
| __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); |
| #endif |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| unsigned int cursor_req_per_width; |
| |
| cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / |
| 256.0 / 8.0, 1.0); |
| if (NumberOfCursors[k] > 0) { |
| if (VRatio[k] <= 1) { |
| CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / |
| HRatio[k] / PixelClock[k] / cursor_req_per_width; |
| } else { |
| CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / |
| PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; |
| } |
| if (VRatioPrefetchY[k] <= 1) { |
| CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / |
| HRatio[k] / PixelClock[k] / cursor_req_per_width; |
| } else { |
| CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / |
| PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; |
| } |
| } else { |
| CursorRequestDeliveryTime[k] = 0; |
| CursorRequestDeliveryTimePrefetch[k] = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", |
| __func__, k, NumberOfCursors[k]); |
| dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", |
| __func__, k, CursorRequestDeliveryTime[k]); |
| dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", |
| __func__, k, CursorRequestDeliveryTimePrefetch[k]); |
| #endif |
| } |
| } // CalculatePixelDeliveryTimes |
| |
| void dml32_CalculateMetaAndPTETimes( |
| bool use_one_row_for_frame[], |
| unsigned int NumberOfActiveSurfaces, |
| bool GPUVMEnable, |
| unsigned int MetaChunkSize, |
| unsigned int MinMetaChunkSizeBytes, |
| unsigned int HTotal[], |
| double VRatio[], |
| double VRatioChroma[], |
| double DestinationLinesToRequestRowInVBlank[], |
| double DestinationLinesToRequestRowInImmediateFlip[], |
| bool DCCEnable[], |
| double PixelClock[], |
| unsigned int BytePerPixelY[], |
| unsigned int BytePerPixelC[], |
| enum dm_rotation_angle SourceRotation[], |
| unsigned int dpte_row_height[], |
| unsigned int dpte_row_height_chroma[], |
| unsigned int meta_row_width[], |
| unsigned int meta_row_width_chroma[], |
| unsigned int meta_row_height[], |
| unsigned int meta_row_height_chroma[], |
| unsigned int meta_req_width[], |
| unsigned int meta_req_width_chroma[], |
| unsigned int meta_req_height[], |
| unsigned int meta_req_height_chroma[], |
| unsigned int dpte_group_bytes[], |
| unsigned int PTERequestSizeY[], |
| unsigned int PTERequestSizeC[], |
| unsigned int PixelPTEReqWidthY[], |
| unsigned int PixelPTEReqHeightY[], |
| unsigned int PixelPTEReqWidthC[], |
| unsigned int PixelPTEReqHeightC[], |
| unsigned int dpte_row_width_luma_ub[], |
| unsigned int dpte_row_width_chroma_ub[], |
| |
| /* Output */ |
| double DST_Y_PER_PTE_ROW_NOM_L[], |
| double DST_Y_PER_PTE_ROW_NOM_C[], |
| double DST_Y_PER_META_ROW_NOM_L[], |
| double DST_Y_PER_META_ROW_NOM_C[], |
| double TimePerMetaChunkNominal[], |
| double TimePerChromaMetaChunkNominal[], |
| double TimePerMetaChunkVBlank[], |
| double TimePerChromaMetaChunkVBlank[], |
| double TimePerMetaChunkFlip[], |
| double TimePerChromaMetaChunkFlip[], |
| double time_per_pte_group_nom_luma[], |
| double time_per_pte_group_vblank_luma[], |
| double time_per_pte_group_flip_luma[], |
| double time_per_pte_group_nom_chroma[], |
| double time_per_pte_group_vblank_chroma[], |
| double time_per_pte_group_flip_chroma[]) |
| { |
| unsigned int meta_chunk_width; |
| unsigned int min_meta_chunk_width; |
| unsigned int meta_chunk_per_row_int; |
| unsigned int meta_row_remainder; |
| unsigned int meta_chunk_threshold; |
| unsigned int meta_chunks_per_row_ub; |
| unsigned int meta_chunk_width_chroma; |
| unsigned int min_meta_chunk_width_chroma; |
| unsigned int meta_chunk_per_row_int_chroma; |
| unsigned int meta_row_remainder_chroma; |
| unsigned int meta_chunk_threshold_chroma; |
| unsigned int meta_chunks_per_row_ub_chroma; |
| unsigned int dpte_group_width_luma; |
| unsigned int dpte_groups_per_row_luma_ub; |
| unsigned int dpte_group_width_chroma; |
| unsigned int dpte_groups_per_row_chroma_ub; |
| unsigned int k; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; |
| if (BytePerPixelC[k] == 0) |
| DST_Y_PER_PTE_ROW_NOM_C[k] = 0; |
| else |
| DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; |
| DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; |
| if (BytePerPixelC[k] == 0) |
| DST_Y_PER_META_ROW_NOM_C[k] = 0; |
| else |
| DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (DCCEnable[k] == true) { |
| meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; |
| min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; |
| meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; |
| meta_row_remainder = meta_row_width[k] % meta_chunk_width; |
| if (!IsVertical(SourceRotation[k])) |
| meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; |
| else |
| meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; |
| |
| if (meta_row_remainder <= meta_chunk_threshold) |
| meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; |
| else |
| meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; |
| |
| TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; |
| TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; |
| TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; |
| if (BytePerPixelC[k] == 0) { |
| TimePerChromaMetaChunkNominal[k] = 0; |
| TimePerChromaMetaChunkVBlank[k] = 0; |
| TimePerChromaMetaChunkFlip[k] = 0; |
| } else { |
| meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / |
| meta_row_height_chroma[k]; |
| min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / |
| meta_row_height_chroma[k]; |
| meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / |
| meta_chunk_width_chroma; |
| meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; |
| if (!IsVertical(SourceRotation[k])) { |
| meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - |
| meta_req_width_chroma[k]; |
| } else { |
| meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - |
| meta_req_height_chroma[k]; |
| } |
| if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) |
| meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; |
| else |
| meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; |
| |
| TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; |
| TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; |
| TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; |
| } |
| } else { |
| TimePerMetaChunkNominal[k] = 0; |
| TimePerMetaChunkVBlank[k] = 0; |
| TimePerMetaChunkFlip[k] = 0; |
| TimePerChromaMetaChunkNominal[k] = 0; |
| TimePerChromaMetaChunkVBlank[k] = 0; |
| TimePerChromaMetaChunkFlip[k] = 0; |
| } |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (GPUVMEnable == true) { |
| if (!IsVertical(SourceRotation[k])) { |
| dpte_group_width_luma = (double) dpte_group_bytes[k] / |
| (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; |
| } else { |
| dpte_group_width_luma = (double) dpte_group_bytes[k] / |
| (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; |
| } |
| |
| if (use_one_row_for_frame[k]) { |
| dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / |
| (double) dpte_group_width_luma / 2.0, 1.0); |
| } else { |
| dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / |
| (double) dpte_group_width_luma, 1.0); |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", |
| __func__, k, use_one_row_for_frame[k]); |
| dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", |
| __func__, k, dpte_group_bytes[k]); |
| dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", |
| __func__, k, PTERequestSizeY[k]); |
| dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", |
| __func__, k, PixelPTEReqWidthY[k]); |
| dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", |
| __func__, k, PixelPTEReqHeightY[k]); |
| dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", |
| __func__, k, dpte_row_width_luma_ub[k]); |
| dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", |
| __func__, k, dpte_group_width_luma); |
| dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", |
| __func__, k, dpte_groups_per_row_luma_ub); |
| #endif |
| |
| time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; |
| time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; |
| time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; |
| if (BytePerPixelC[k] == 0) { |
| time_per_pte_group_nom_chroma[k] = 0; |
| time_per_pte_group_vblank_chroma[k] = 0; |
| time_per_pte_group_flip_chroma[k] = 0; |
| } else { |
| if (!IsVertical(SourceRotation[k])) { |
| dpte_group_width_chroma = (double) dpte_group_bytes[k] / |
| (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; |
| } else { |
| dpte_group_width_chroma = (double) dpte_group_bytes[k] / |
| (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; |
| } |
| |
| if (use_one_row_for_frame[k]) { |
| dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / |
| (double) dpte_group_width_chroma / 2.0, 1.0); |
| } else { |
| dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / |
| (double) dpte_group_width_chroma, 1.0); |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", |
| __func__, k, dpte_row_width_chroma_ub[k]); |
| dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", |
| __func__, k, dpte_group_width_chroma); |
| dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", |
| __func__, k, dpte_groups_per_row_chroma_ub); |
| #endif |
| time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; |
| time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; |
| time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; |
| } |
| } else { |
| time_per_pte_group_nom_luma[k] = 0; |
| time_per_pte_group_vblank_luma[k] = 0; |
| time_per_pte_group_flip_luma[k] = 0; |
| time_per_pte_group_nom_chroma[k] = 0; |
| time_per_pte_group_vblank_chroma[k] = 0; |
| time_per_pte_group_flip_chroma[k] = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", |
| __func__, k, DestinationLinesToRequestRowInVBlank[k]); |
| dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", |
| __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); |
| dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", |
| __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); |
| dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", |
| __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); |
| dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", |
| __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); |
| dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", |
| __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); |
| dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", |
| __func__, k, TimePerMetaChunkNominal[k]); |
| dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", |
| __func__, k, TimePerMetaChunkVBlank[k]); |
| dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", |
| __func__, k, TimePerMetaChunkFlip[k]); |
| dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", |
| __func__, k, TimePerChromaMetaChunkNominal[k]); |
| dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", |
| __func__, k, TimePerChromaMetaChunkVBlank[k]); |
| dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", |
| __func__, k, TimePerChromaMetaChunkFlip[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", |
| __func__, k, time_per_pte_group_nom_luma[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", |
| __func__, k, time_per_pte_group_vblank_luma[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", |
| __func__, k, time_per_pte_group_flip_luma[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", |
| __func__, k, time_per_pte_group_nom_chroma[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", |
| __func__, k, time_per_pte_group_vblank_chroma[k]); |
| dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", |
| __func__, k, time_per_pte_group_flip_chroma[k]); |
| #endif |
| } |
| } // CalculateMetaAndPTETimes |
| |
| void dml32_CalculateVMGroupAndRequestTimes( |
| unsigned int NumberOfActiveSurfaces, |
| bool GPUVMEnable, |
| unsigned int GPUVMMaxPageTableLevels, |
| unsigned int HTotal[], |
| unsigned int BytePerPixelC[], |
| double DestinationLinesToRequestVMInVBlank[], |
| double DestinationLinesToRequestVMInImmediateFlip[], |
| bool DCCEnable[], |
| double PixelClock[], |
| unsigned int dpte_row_width_luma_ub[], |
| unsigned int dpte_row_width_chroma_ub[], |
| unsigned int vm_group_bytes[], |
| unsigned int dpde0_bytes_per_frame_ub_l[], |
| unsigned int dpde0_bytes_per_frame_ub_c[], |
| unsigned int meta_pte_bytes_per_frame_ub_l[], |
| unsigned int meta_pte_bytes_per_frame_ub_c[], |
| |
| /* Output */ |
| double TimePerVMGroupVBlank[], |
| double TimePerVMGroupFlip[], |
| double TimePerVMRequestVBlank[], |
| double TimePerVMRequestFlip[]) |
| { |
| unsigned int k; |
| unsigned int num_group_per_lower_vm_stage; |
| unsigned int num_req_per_lower_vm_stage; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); |
| dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); |
| #endif |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); |
| dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); |
| dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", |
| __func__, k, dpde0_bytes_per_frame_ub_l[k]); |
| dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", |
| __func__, k, dpde0_bytes_per_frame_ub_c[k]); |
| dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", |
| __func__, k, meta_pte_bytes_per_frame_ub_l[k]); |
| dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", |
| __func__, k, meta_pte_bytes_per_frame_ub_c[k]); |
| #endif |
| |
| if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { |
| if (DCCEnable[k] == false) { |
| if (BytePerPixelC[k] > 0) { |
| num_group_per_lower_vm_stage = dml_ceil( |
| (double) (dpde0_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1.0) + |
| dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / |
| (double) (vm_group_bytes[k]), 1.0); |
| } else { |
| num_group_per_lower_vm_stage = dml_ceil( |
| (double) (dpde0_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1.0); |
| } |
| } else { |
| if (GPUVMMaxPageTableLevels == 1) { |
| if (BytePerPixelC[k] > 0) { |
| num_group_per_lower_vm_stage = dml_ceil( |
| (double) (meta_pte_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1.0) + |
| dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / |
| (double) (vm_group_bytes[k]), 1.0); |
| } else { |
| num_group_per_lower_vm_stage = dml_ceil( |
| (double) (meta_pte_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1.0); |
| } |
| } else { |
| if (BytePerPixelC[k] > 0) { |
| num_group_per_lower_vm_stage = 2 + dml_ceil( |
| (double) (dpde0_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1) + |
| dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / |
| (double) (vm_group_bytes[k]), 1) + |
| dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1) + |
| dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / |
| (double) (vm_group_bytes[k]), 1); |
| } else { |
| num_group_per_lower_vm_stage = 1 + dml_ceil( |
| (double) (dpde0_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1) + dml_ceil( |
| (double) (meta_pte_bytes_per_frame_ub_l[k]) / |
| (double) (vm_group_bytes[k]), 1); |
| } |
| } |
| } |
| |
| if (DCCEnable[k] == false) { |
| if (BytePerPixelC[k] > 0) { |
| num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + |
| dpde0_bytes_per_frame_ub_c[k] / 64; |
| } else { |
| num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; |
| } |
| } else { |
| if (GPUVMMaxPageTableLevels == 1) { |
| if (BytePerPixelC[k] > 0) { |
| num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + |
| meta_pte_bytes_per_frame_ub_c[k] / 64; |
| } else { |
| num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; |
| } |
| } else { |
| if (BytePerPixelC[k] > 0) { |
| num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / |
| 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + |
| meta_pte_bytes_per_frame_ub_l[k] / 64 + |
| meta_pte_bytes_per_frame_ub_c[k] / 64; |
| } else { |
| num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / |
| 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; |
| } |
| } |
| } |
| |
| TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * |
| HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; |
| TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; |
| TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * |
| HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; |
| TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * |
| HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; |
| |
| if (GPUVMMaxPageTableLevels > 2) { |
| TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; |
| TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; |
| TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; |
| TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; |
| } |
| |
| } else { |
| TimePerVMGroupVBlank[k] = 0; |
| TimePerVMGroupFlip[k] = 0; |
| TimePerVMRequestVBlank[k] = 0; |
| TimePerVMRequestFlip[k] = 0; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); |
| dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); |
| dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); |
| dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); |
| #endif |
| } |
| } // CalculateVMGroupAndRequestTimes |
| |
| void dml32_CalculateDCCConfiguration( |
| bool DCCEnabled, |
| bool DCCProgrammingAssumesScanDirectionUnknown, |
| enum source_format_class SourcePixelFormat, |
| unsigned int SurfaceWidthLuma, |
| unsigned int SurfaceWidthChroma, |
| unsigned int SurfaceHeightLuma, |
| unsigned int SurfaceHeightChroma, |
| unsigned int nomDETInKByte, |
| unsigned int RequestHeight256ByteLuma, |
| unsigned int RequestHeight256ByteChroma, |
| enum dm_swizzle_mode TilingFormat, |
| unsigned int BytePerPixelY, |
| unsigned int BytePerPixelC, |
| double BytePerPixelDETY, |
| double BytePerPixelDETC, |
| enum dm_rotation_angle SourceRotation, |
| /* Output */ |
| unsigned int *MaxUncompressedBlockLuma, |
| unsigned int *MaxUncompressedBlockChroma, |
| unsigned int *MaxCompressedBlockLuma, |
| unsigned int *MaxCompressedBlockChroma, |
| unsigned int *IndependentBlockLuma, |
| unsigned int *IndependentBlockChroma) |
| { |
| typedef enum { |
| REQ_256Bytes, |
| REQ_128BytesNonContiguous, |
| REQ_128BytesContiguous, |
| REQ_NA |
| } RequestType; |
| |
| RequestType RequestLuma; |
| RequestType RequestChroma; |
| |
| unsigned int segment_order_horz_contiguous_luma; |
| unsigned int segment_order_horz_contiguous_chroma; |
| unsigned int segment_order_vert_contiguous_luma; |
| unsigned int segment_order_vert_contiguous_chroma; |
| unsigned int req128_horz_wc_l; |
| unsigned int req128_horz_wc_c; |
| unsigned int req128_vert_wc_l; |
| unsigned int req128_vert_wc_c; |
| unsigned int MAS_vp_horz_limit; |
| unsigned int MAS_vp_vert_limit; |
| unsigned int max_vp_horz_width; |
| unsigned int max_vp_vert_height; |
| unsigned int eff_surf_width_l; |
| unsigned int eff_surf_width_c; |
| unsigned int eff_surf_height_l; |
| unsigned int eff_surf_height_c; |
| unsigned int full_swath_bytes_horz_wc_l; |
| unsigned int full_swath_bytes_horz_wc_c; |
| unsigned int full_swath_bytes_vert_wc_l; |
| unsigned int full_swath_bytes_vert_wc_c; |
| unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; |
| |
| unsigned int yuv420; |
| unsigned int horz_div_l; |
| unsigned int horz_div_c; |
| unsigned int vert_div_l; |
| unsigned int vert_div_c; |
| |
| unsigned int swath_buf_size; |
| double detile_buf_vp_horz_limit; |
| double detile_buf_vp_vert_limit; |
| |
| yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || |
| SourcePixelFormat == dm_420_12) ? 1 : 0); |
| horz_div_l = 1; |
| horz_div_c = 1; |
| vert_div_l = 1; |
| vert_div_c = 1; |
| |
| if (BytePerPixelY == 1) |
| vert_div_l = 0; |
| if (BytePerPixelC == 1) |
| vert_div_c = 0; |
| |
| if (BytePerPixelC == 0) { |
| swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; |
| detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * |
| BytePerPixelY / (1 + horz_div_l)); |
| detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / |
| (1 + vert_div_l)); |
| } else { |
| swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; |
| detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * |
| BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * |
| BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); |
| detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / |
| (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / |
| (1 + vert_div_c) / (1 + yuv420)); |
| } |
| |
| if (SourcePixelFormat == dm_420_10) { |
| detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; |
| detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; |
| } |
| |
| detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); |
| detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); |
| |
| MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; |
| MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); |
| max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); |
| max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); |
| eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); |
| eff_surf_width_c = eff_surf_width_l / (1 + yuv420); |
| eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); |
| eff_surf_height_c = eff_surf_height_l / (1 + yuv420); |
| |
| full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; |
| full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; |
| if (BytePerPixelC > 0) { |
| full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; |
| full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; |
| } else { |
| full_swath_bytes_horz_wc_c = 0; |
| full_swath_bytes_vert_wc_c = 0; |
| } |
| |
| if (SourcePixelFormat == dm_420_10) { |
| full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); |
| full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); |
| full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); |
| full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); |
| } |
| |
| if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { |
| req128_horz_wc_l = 0; |
| req128_horz_wc_c = 0; |
| } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + |
| full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { |
| req128_horz_wc_l = 0; |
| req128_horz_wc_c = 1; |
| } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * |
| full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { |
| req128_horz_wc_l = 1; |
| req128_horz_wc_c = 0; |
| } else { |
| req128_horz_wc_l = 1; |
| req128_horz_wc_c = 1; |
| } |
| |
| if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { |
| req128_vert_wc_l = 0; |
| req128_vert_wc_c = 0; |
| } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * |
| full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { |
| req128_vert_wc_l = 0; |
| req128_vert_wc_c = 1; |
| } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && |
| full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { |
| req128_vert_wc_l = 1; |
| req128_vert_wc_c = 0; |
| } else { |
| req128_vert_wc_l = 1; |
| req128_vert_wc_c = 1; |
| } |
| |
| if (BytePerPixelY == 2) { |
| segment_order_horz_contiguous_luma = 0; |
| segment_order_vert_contiguous_luma = 1; |
| } else { |
| segment_order_horz_contiguous_luma = 1; |
| segment_order_vert_contiguous_luma = 0; |
| } |
| |
| if (BytePerPixelC == 2) { |
| segment_order_horz_contiguous_chroma = 0; |
| segment_order_vert_contiguous_chroma = 1; |
| } else { |
| segment_order_horz_contiguous_chroma = 1; |
| segment_order_vert_contiguous_chroma = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); |
| dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); |
| dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); |
| dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); |
| dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); |
| dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); |
| dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); |
| dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); |
| dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", |
| __func__, segment_order_horz_contiguous_chroma); |
| #endif |
| |
| if (DCCProgrammingAssumesScanDirectionUnknown == true) { |
| if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) |
| RequestLuma = REQ_256Bytes; |
| else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || |
| (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) |
| RequestLuma = REQ_128BytesNonContiguous; |
| else |
| RequestLuma = REQ_128BytesContiguous; |
| |
| if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) |
| RequestChroma = REQ_256Bytes; |
| else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || |
| (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) |
| RequestChroma = REQ_128BytesNonContiguous; |
| else |
| RequestChroma = REQ_128BytesContiguous; |
| |
| } else if (!IsVertical(SourceRotation)) { |
| if (req128_horz_wc_l == 0) |
| RequestLuma = REQ_256Bytes; |
| else if (segment_order_horz_contiguous_luma == 0) |
| RequestLuma = REQ_128BytesNonContiguous; |
| else |
| RequestLuma = REQ_128BytesContiguous; |
| |
| if (req128_horz_wc_c == 0) |
| RequestChroma = REQ_256Bytes; |
| else if (segment_order_horz_contiguous_chroma == 0) |
| RequestChroma = REQ_128BytesNonContiguous; |
| else |
| RequestChroma = REQ_128BytesContiguous; |
| |
| } else { |
| if (req128_vert_wc_l == 0) |
| RequestLuma = REQ_256Bytes; |
| else if (segment_order_vert_contiguous_luma == 0) |
| RequestLuma = REQ_128BytesNonContiguous; |
| else |
| RequestLuma = REQ_128BytesContiguous; |
| |
| if (req128_vert_wc_c == 0) |
| RequestChroma = REQ_256Bytes; |
| else if (segment_order_vert_contiguous_chroma == 0) |
| RequestChroma = REQ_128BytesNonContiguous; |
| else |
| RequestChroma = REQ_128BytesContiguous; |
| } |
| |
| if (RequestLuma == REQ_256Bytes) { |
| *MaxUncompressedBlockLuma = 256; |
| *MaxCompressedBlockLuma = 256; |
| *IndependentBlockLuma = 0; |
| } else if (RequestLuma == REQ_128BytesContiguous) { |
| *MaxUncompressedBlockLuma = 256; |
| *MaxCompressedBlockLuma = 128; |
| *IndependentBlockLuma = 128; |
| } else { |
| *MaxUncompressedBlockLuma = 256; |
| *MaxCompressedBlockLuma = 64; |
| *IndependentBlockLuma = 64; |
| } |
| |
| if (RequestChroma == REQ_256Bytes) { |
| *MaxUncompressedBlockChroma = 256; |
| *MaxCompressedBlockChroma = 256; |
| *IndependentBlockChroma = 0; |
| } else if (RequestChroma == REQ_128BytesContiguous) { |
| *MaxUncompressedBlockChroma = 256; |
| *MaxCompressedBlockChroma = 128; |
| *IndependentBlockChroma = 128; |
| } else { |
| *MaxUncompressedBlockChroma = 256; |
| *MaxCompressedBlockChroma = 64; |
| *IndependentBlockChroma = 64; |
| } |
| |
| if (DCCEnabled != true || BytePerPixelC == 0) { |
| *MaxUncompressedBlockChroma = 0; |
| *MaxCompressedBlockChroma = 0; |
| *IndependentBlockChroma = 0; |
| } |
| |
| if (DCCEnabled != true) { |
| *MaxUncompressedBlockLuma = 0; |
| *MaxCompressedBlockLuma = 0; |
| *IndependentBlockLuma = 0; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); |
| dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); |
| dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); |
| dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); |
| dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); |
| dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); |
| #endif |
| |
| } // CalculateDCCConfiguration |
| |
| void dml32_CalculateStutterEfficiency( |
| unsigned int CompressedBufferSizeInkByte, |
| enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], |
| bool UnboundedRequestEnabled, |
| unsigned int MetaFIFOSizeInKEntries, |
| unsigned int ZeroSizeBufferEntries, |
| unsigned int PixelChunkSizeInKByte, |
| unsigned int NumberOfActiveSurfaces, |
| unsigned int ROBBufferSizeInKByte, |
| double TotalDataReadBandwidth, |
| double DCFCLK, |
| double ReturnBW, |
| unsigned int CompbufReservedSpace64B, |
| unsigned int CompbufReservedSpaceZs, |
| double SRExitTime, |
| double SRExitZ8Time, |
| bool SynchronizeTimingsFinal, |
| unsigned int BlendingAndTiming[], |
| double StutterEnterPlusExitWatermark, |
| double Z8StutterEnterPlusExitWatermark, |
| bool ProgressiveToInterlaceUnitInOPP, |
| bool Interlace[], |
| double MinTTUVBlank[], |
| unsigned int DPPPerSurface[], |
| unsigned int DETBufferSizeY[], |
| unsigned int BytePerPixelY[], |
| double BytePerPixelDETY[], |
| double SwathWidthY[], |
| unsigned int SwathHeightY[], |
| unsigned int SwathHeightC[], |
| double NetDCCRateLuma[], |
| double NetDCCRateChroma[], |
| double DCCFractionOfZeroSizeRequestsLuma[], |
| double DCCFractionOfZeroSizeRequestsChroma[], |
| unsigned int HTotal[], |
| unsigned int VTotal[], |
| double PixelClock[], |
| double VRatio[], |
| enum dm_rotation_angle SourceRotation[], |
| unsigned int BlockHeight256BytesY[], |
| unsigned int BlockWidth256BytesY[], |
| unsigned int BlockHeight256BytesC[], |
| unsigned int BlockWidth256BytesC[], |
| unsigned int DCCYMaxUncompressedBlock[], |
| unsigned int DCCCMaxUncompressedBlock[], |
| unsigned int VActive[], |
| bool DCCEnable[], |
| bool WritebackEnable[], |
| double ReadBandwidthSurfaceLuma[], |
| double ReadBandwidthSurfaceChroma[], |
| double meta_row_bw[], |
| double dpte_row_bw[], |
| |
| /* Output */ |
| double *StutterEfficiencyNotIncludingVBlank, |
| double *StutterEfficiency, |
| unsigned int *NumberOfStutterBurstsPerFrame, |
| double *Z8StutterEfficiencyNotIncludingVBlank, |
| double *Z8StutterEfficiency, |
| unsigned int *Z8NumberOfStutterBurstsPerFrame, |
| double *StutterPeriod, |
| bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) |
| { |
| |
| bool FoundCriticalSurface = false; |
| unsigned int SwathSizeCriticalSurface = 0; |
| unsigned int LastChunkOfSwathSize; |
| unsigned int MissingPartOfLastSwathOfDETSize; |
| double LastZ8StutterPeriod = 0.0; |
| double LastStutterPeriod = 0.0; |
| unsigned int TotalNumberOfActiveOTG = 0; |
| double doublePixelClock; |
| unsigned int doubleHTotal; |
| unsigned int doubleVTotal; |
| bool SameTiming = true; |
| double DETBufferingTimeY; |
| double SwathWidthYCriticalSurface = 0.0; |
| double SwathHeightYCriticalSurface = 0.0; |
| double VActiveTimeCriticalSurface = 0.0; |
| double FrameTimeCriticalSurface = 0.0; |
| unsigned int BytePerPixelYCriticalSurface = 0; |
| double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; |
| unsigned int DETBufferSizeYCriticalSurface = 0; |
| double MinTTUVBlankCriticalSurface = 0.0; |
| unsigned int BlockWidth256BytesYCriticalSurface = 0; |
| bool doublePlaneCriticalSurface = 0; |
| bool doublePipeCriticalSurface = 0; |
| double TotalCompressedReadBandwidth; |
| double TotalRowReadBandwidth; |
| double AverageDCCCompressionRate; |
| double EffectiveCompressedBufferSize; |
| double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; |
| double StutterBurstTime; |
| unsigned int TotalActiveWriteback; |
| double LinesInDETY; |
| double LinesInDETYRoundedDownToSwath; |
| double MaximumEffectiveCompressionLuma; |
| double MaximumEffectiveCompressionChroma; |
| double TotalZeroSizeRequestReadBandwidth; |
| double TotalZeroSizeCompressedReadBandwidth; |
| double AverageDCCZeroSizeFraction; |
| double AverageZeroSizeCompressionRate; |
| unsigned int k; |
| |
| TotalZeroSizeRequestReadBandwidth = 0; |
| TotalZeroSizeCompressedReadBandwidth = 0; |
| TotalRowReadBandwidth = 0; |
| TotalCompressedReadBandwidth = 0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { |
| if (DCCEnable[k] == true) { |
| if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) |
| || (!IsVertical(SourceRotation[k]) |
| && BlockHeight256BytesY[k] > SwathHeightY[k]) |
| || DCCYMaxUncompressedBlock[k] < 256) { |
| MaximumEffectiveCompressionLuma = 2; |
| } else { |
| MaximumEffectiveCompressionLuma = 4; |
| } |
| TotalCompressedReadBandwidth = TotalCompressedReadBandwidth |
| + ReadBandwidthSurfaceLuma[k] |
| / dml_min(NetDCCRateLuma[k], |
| MaximumEffectiveCompressionLuma); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", |
| __func__, k, ReadBandwidthSurfaceLuma[k]); |
| dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", |
| __func__, k, NetDCCRateLuma[k]); |
| dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", |
| __func__, k, MaximumEffectiveCompressionLuma); |
| #endif |
| TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth |
| + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; |
| TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth |
| + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] |
| / MaximumEffectiveCompressionLuma; |
| |
| if (ReadBandwidthSurfaceChroma[k] > 0) { |
| if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) |
| || (!IsVertical(SourceRotation[k]) |
| && BlockHeight256BytesC[k] > SwathHeightC[k]) |
| || DCCCMaxUncompressedBlock[k] < 256) { |
| MaximumEffectiveCompressionChroma = 2; |
| } else { |
| MaximumEffectiveCompressionChroma = 4; |
| } |
| TotalCompressedReadBandwidth = |
| TotalCompressedReadBandwidth |
| + ReadBandwidthSurfaceChroma[k] |
| / dml_min(NetDCCRateChroma[k], |
| MaximumEffectiveCompressionChroma); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", |
| __func__, k, ReadBandwidthSurfaceChroma[k]); |
| dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", |
| __func__, k, NetDCCRateChroma[k]); |
| dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", |
| __func__, k, MaximumEffectiveCompressionChroma); |
| #endif |
| TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth |
| + ReadBandwidthSurfaceChroma[k] |
| * DCCFractionOfZeroSizeRequestsChroma[k]; |
| TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth |
| + ReadBandwidthSurfaceChroma[k] |
| * DCCFractionOfZeroSizeRequestsChroma[k] |
| / MaximumEffectiveCompressionChroma; |
| } |
| } else { |
| TotalCompressedReadBandwidth = TotalCompressedReadBandwidth |
| + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; |
| } |
| TotalRowReadBandwidth = TotalRowReadBandwidth |
| + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); |
| } |
| } |
| |
| AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; |
| AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); |
| dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); |
| dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); |
| dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", |
| __func__, TotalZeroSizeCompressedReadBandwidth); |
| dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); |
| dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); |
| dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); |
| dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); |
| dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); |
| dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); |
| dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); |
| #endif |
| if (AverageDCCZeroSizeFraction == 1) { |
| AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth |
| / TotalZeroSizeCompressedReadBandwidth; |
| EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 |
| * AverageZeroSizeCompressionRate |
| + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 |
| * AverageZeroSizeCompressionRate; |
| } else if (AverageDCCZeroSizeFraction > 0) { |
| AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth |
| / TotalZeroSizeCompressedReadBandwidth; |
| EffectiveCompressedBufferSize = dml_min( |
| (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, |
| (double) MetaFIFOSizeInKEntries * 1024 * 64 |
| / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate |
| + 1 / AverageDCCCompressionRate)) |
| + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) |
| * AverageDCCCompressionRate, |
| ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 |
| / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: min 1 = %f\n", __func__, |
| CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); |
| dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / |
| (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / |
| AverageDCCCompressionRate)); |
| dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - |
| CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); |
| dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / |
| (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); |
| #endif |
| } else { |
| EffectiveCompressedBufferSize = dml_min( |
| (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, |
| (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) |
| + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) |
| * AverageDCCCompressionRate; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: min 1 = %f\n", __func__, |
| CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); |
| dml_print("DML::%s: min 2 = %f\n", __func__, |
| MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); |
| #endif |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); |
| dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); |
| dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); |
| #endif |
| |
| *StutterPeriod = 0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { |
| LinesInDETY = ((double) DETBufferSizeY[k] |
| + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) |
| * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) |
| / BytePerPixelDETY[k] / SwathWidthY[k]; |
| LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); |
| DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) |
| / VRatio[k]; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); |
| dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); |
| dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); |
| dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", |
| __func__, k, ReadBandwidthSurfaceLuma[k]); |
| dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); |
| dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); |
| dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", |
| __func__, k, LinesInDETYRoundedDownToSwath); |
| dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); |
| dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); |
| dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); |
| dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); |
| dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); |
| #endif |
| |
| if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { |
| bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; |
| |
| FoundCriticalSurface = true; |
| *StutterPeriod = DETBufferingTimeY; |
| FrameTimeCriticalSurface = ( |
| isInterlaceTiming ? |
| dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) |
| * (double) HTotal[k] / PixelClock[k]; |
| VActiveTimeCriticalSurface = ( |
| isInterlaceTiming ? |
| dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) |
| * (double) HTotal[k] / PixelClock[k]; |
| BytePerPixelYCriticalSurface = BytePerPixelY[k]; |
| SwathWidthYCriticalSurface = SwathWidthY[k]; |
| SwathHeightYCriticalSurface = SwathHeightY[k]; |
| BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; |
| LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] |
| - (LinesInDETY - LinesInDETYRoundedDownToSwath); |
| DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; |
| MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; |
| doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); |
| doublePipeCriticalSurface = (DPPPerSurface[k] == 1); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", |
| __func__, k, FoundCriticalSurface); |
| dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", |
| __func__, k, *StutterPeriod); |
| dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", |
| __func__, k, MinTTUVBlankCriticalSurface); |
| dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", |
| __func__, k, FrameTimeCriticalSurface); |
| dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", |
| __func__, k, VActiveTimeCriticalSurface); |
| dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", |
| __func__, k, BytePerPixelYCriticalSurface); |
| dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", |
| __func__, k, SwathWidthYCriticalSurface); |
| dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", |
| __func__, k, SwathHeightYCriticalSurface); |
| dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", |
| __func__, k, BlockWidth256BytesYCriticalSurface); |
| dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", |
| __func__, k, doublePlaneCriticalSurface); |
| dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", |
| __func__, k, doublePipeCriticalSurface); |
| dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", |
| __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); |
| #endif |
| } |
| } |
| } |
| |
| PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, |
| EffectiveCompressedBufferSize); |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); |
| dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); |
| dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", |
| __func__, *StutterPeriod * TotalDataReadBandwidth); |
| dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); |
| dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, |
| PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); |
| dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); |
| dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); |
| dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); |
| dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); |
| #endif |
| |
| StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate |
| / ReturnBW |
| + (*StutterPeriod * TotalDataReadBandwidth |
| - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) |
| + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / |
| AverageDCCCompressionRate / ReturnBW); |
| dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", |
| __func__, (*StutterPeriod * TotalDataReadBandwidth)); |
| dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - |
| PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); |
| dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); |
| dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); |
| #endif |
| StutterBurstTime = dml_max(StutterBurstTime, |
| LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface |
| * SwathWidthYCriticalSurface / ReturnBW); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: Time to finish residue swath=%f\n", |
| __func__, |
| LinesToFinishSwathTransferStutterCriticalSurface * |
| BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); |
| #endif |
| |
| TotalActiveWriteback = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (WritebackEnable[k]) |
| TotalActiveWriteback = TotalActiveWriteback + 1; |
| } |
| |
| if (TotalActiveWriteback == 0) { |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); |
| dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); |
| dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); |
| dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); |
| #endif |
| *StutterEfficiencyNotIncludingVBlank = dml_max(0., |
| 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; |
| *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., |
| 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; |
| *NumberOfStutterBurstsPerFrame = ( |
| *StutterEfficiencyNotIncludingVBlank > 0 ? |
| dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); |
| *Z8NumberOfStutterBurstsPerFrame = ( |
| *Z8StutterEfficiencyNotIncludingVBlank > 0 ? |
| dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); |
| } else { |
| *StutterEfficiencyNotIncludingVBlank = 0.; |
| *Z8StutterEfficiencyNotIncludingVBlank = 0.; |
| *NumberOfStutterBurstsPerFrame = 0; |
| *Z8NumberOfStutterBurstsPerFrame = 0; |
| } |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); |
| dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", |
| __func__, *StutterEfficiencyNotIncludingVBlank); |
| dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", |
| __func__, *Z8StutterEfficiencyNotIncludingVBlank); |
| dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); |
| dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); |
| #endif |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { |
| if (BlendingAndTiming[k] == k) { |
| if (TotalNumberOfActiveOTG == 0) { |
| doublePixelClock = PixelClock[k]; |
| doubleHTotal = HTotal[k]; |
| doubleVTotal = VTotal[k]; |
| } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] |
| || doubleVTotal != VTotal[k]) { |
| SameTiming = false; |
| } |
| TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; |
| } |
| } |
| } |
| |
| if (*StutterEfficiencyNotIncludingVBlank > 0) { |
| LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; |
| |
| if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming |
| && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { |
| *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime |
| + StutterBurstTime * VActiveTimeCriticalSurface |
| / *StutterPeriod) / FrameTimeCriticalSurface) * 100; |
| } else { |
| *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; |
| } |
| } else { |
| *StutterEfficiency = 0; |
| } |
| |
| if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { |
| LastZ8StutterPeriod = VActiveTimeCriticalSurface |
| - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; |
| if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + |
| MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { |
| *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime |
| * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; |
| } else { |
| *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; |
| } |
| } else { |
| *Z8StutterEfficiency = 0.; |
| } |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); |
| dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); |
| dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); |
| dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); |
| dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); |
| dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); |
| dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", |
| __func__, *StutterEfficiencyNotIncludingVBlank); |
| dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); |
| #endif |
| |
| SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface |
| * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); |
| LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); |
| MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) |
| - DETBufferSizeYCriticalSurface; |
| |
| *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) |
| && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) |
| && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) |
| && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); |
| dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); |
| dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); |
| dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); |
| #endif |
| } // CalculateStutterEfficiency |
| |
| void dml32_CalculateMaxDETAndMinCompressedBufferSize( |
| unsigned int ConfigReturnBufferSizeInKByte, |
| unsigned int ROBBufferSizeInKByte, |
| unsigned int MaxNumDPP, |
| bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size |
| unsigned int nomDETInKByteOverrideValue, // VBA_DELTA |
| |
| /* Output */ |
| unsigned int *MaxTotalDETInKByte, |
| unsigned int *nomDETInKByte, |
| unsigned int *MinCompressedBufferSizeInKByte) |
| { |
| bool det_buff_size_override_en = nomDETInKByteOverrideEnable; |
| unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; |
| |
| *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + |
| (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); |
| *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); |
| *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); |
| dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); |
| dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); |
| dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); |
| dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); |
| dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); |
| #endif |
| |
| if (det_buff_size_override_en) { |
| *nomDETInKByte = det_buff_size_override_val; |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); |
| #endif |
| } |
| } // CalculateMaxDETAndMinCompressedBufferSize |
| |
| bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, |
| double ReturnBW, |
| bool NotUrgentLatencyHiding[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double cursor_bw[], |
| double meta_row_bandwidth[], |
| double dpte_row_bandwidth[], |
| unsigned int NumberOfDPP[], |
| double UrgentBurstFactorLuma[], |
| double UrgentBurstFactorChroma[], |
| double UrgentBurstFactorCursor[]) |
| { |
| unsigned int k; |
| bool NotEnoughUrgentLatencyHiding = false; |
| bool CalculateVActiveBandwithSupport_val = false; |
| double VActiveBandwith = 0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (NotUrgentLatencyHiding[k]) { |
| NotEnoughUrgentLatencyHiding = true; |
| } |
| } |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; |
| } |
| |
| CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; |
| |
| #ifdef __DML_VBA_DEBUG__ |
| dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); |
| dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); |
| dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); |
| dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); |
| #endif |
| return CalculateVActiveBandwithSupport_val; |
| } |
| |
| void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, |
| double ReturnBW, |
| bool NotUrgentLatencyHiding[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double PrefetchBandwidthLuma[], |
| double PrefetchBandwidthChroma[], |
| double cursor_bw[], |
| double meta_row_bandwidth[], |
| double dpte_row_bandwidth[], |
| double cursor_bw_pre[], |
| double prefetch_vmrow_bw[], |
| unsigned int NumberOfDPP[], |
| double UrgentBurstFactorLuma[], |
| double UrgentBurstFactorChroma[], |
| double UrgentBurstFactorCursor[], |
| double UrgentBurstFactorLumaPre[], |
| double UrgentBurstFactorChromaPre[], |
| double UrgentBurstFactorCursorPre[], |
| double PrefetchBW[], |
| double VRatio[], |
| double MaxVRatioPre, |
| |
| /* output */ |
| double *MaxPrefetchBandwidth, |
| double *FractionOfUrgentBandwidth, |
| bool *PrefetchBandwidthSupport) |
| { |
| unsigned int k; |
| double ActiveBandwidthPerSurface; |
| bool NotEnoughUrgentLatencyHiding = false; |
| double TotalActiveBandwidth = 0; |
| double TotalPrefetchBandwidth = 0; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (NotUrgentLatencyHiding[k]) { |
| NotEnoughUrgentLatencyHiding = true; |
| } |
| } |
| |
| *MaxPrefetchBandwidth = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]); |
| |
| TotalActiveBandwidth += ActiveBandwidthPerSurface; |
| |
| TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k]; |
| |
| *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], |
| ActiveBandwidthPerSurface, |
| NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); |
| } |
| |
| if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__) |
| *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding; |
| else |
| *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; |
| |
| *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW; |
| } |
| |
| double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, |
| double ReturnBW, |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double PrefetchBandwidthLuma[], |
| double PrefetchBandwidthChroma[], |
| double cursor_bw[], |
| double cursor_bw_pre[], |
| unsigned int NumberOfDPP[], |
| double UrgentBurstFactorLuma[], |
| double UrgentBurstFactorChroma[], |
| double UrgentBurstFactorCursor[], |
| double UrgentBurstFactorLumaPre[], |
| double UrgentBurstFactorChromaPre[], |
| double UrgentBurstFactorCursorPre[]) |
| { |
| unsigned int k; |
| double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; |
| |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], |
| NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); |
| } |
| |
| return CalculateBandwidthAvailableForImmediateFlip_val; |
| } |
| |
| void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, |
| double ReturnBW, |
| enum immediate_flip_requirement ImmediateFlipRequirement[], |
| double final_flip_bw[], |
| double ReadBandwidthLuma[], |
| double ReadBandwidthChroma[], |
| double PrefetchBandwidthLuma[], |
| double PrefetchBandwidthChroma[], |
| double cursor_bw[], |
| double meta_row_bandwidth[], |
| double dpte_row_bandwidth[], |
| double cursor_bw_pre[], |
| double prefetch_vmrow_bw[], |
| unsigned int NumberOfDPP[], |
| double UrgentBurstFactorLuma[], |
| double UrgentBurstFactorChroma[], |
| double UrgentBurstFactorCursor[], |
| double UrgentBurstFactorLumaPre[], |
| double UrgentBurstFactorChromaPre[], |
| double UrgentBurstFactorCursorPre[], |
| |
| /* output */ |
| double *TotalBandwidth, |
| double *FractionOfUrgentBandwidth, |
| bool *ImmediateFlipBandwidthSupport) |
| { |
| unsigned int k; |
| *TotalBandwidth = 0; |
| for (k = 0; k < NumberOfActiveSurfaces; ++k) { |
| if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { |
| *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], |
| NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], |
| NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); |
| } else { |
| *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], |
| NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], |
| NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); |
| } |
| } |
| *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); |
| *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; |
| } |
| |
| bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, |
| double ReturnBW, |
| double UrgentLatency, |
| unsigned int SwathHeightY[], |
| unsigned int SwathHeightC[], |
| unsigned int SwathWidthY[], |
| unsigned int SwathWidthC[], |
| double BytePerPixelInDETY[], |
| double BytePerPixelInDETC[], |
| unsigned int DETBufferSizeY[], |
| unsigned int DETBufferSizeC[], |
| unsigned int NumOfDPP[], |
| unsigned int HTotal[], |
| double PixelClock[], |
| double VRatioY[], |
| double VRatioC[], |
| enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], |
| enum unbounded_requesting_policy UseUnboundedRequesting) |
| { |
| int k; |
| double SwathSizeAllSurfaces = 0; |
| double SwathSizeAllSurfacesInFetchTimeUs; |
| double DETSwathLatencyHidingUs; |
| double DETSwathLatencyHidingYUs; |
| double DETSwathLatencyHidingCUs; |
| double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; |
| double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; |
| bool NotEnoughDETSwathFillLatencyHiding = false; |
| |
| if (UseUnboundedRequesting == dm_unbounded_requesting) |
| return false; |
| |
| /* calculate sum of single swath size for all pipes in bytes */ |
| for (k = 0; k < NumberOfActiveSurfaces; k++) { |
| SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; |
| |
| if (SwathHeightC[k] != 0) |
| SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; |
| else |
| SwathSizePerSurfaceC[k] = 0; |
| |
| SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; |
| } |
| |
| SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; |
| |
| /* ensure all DET - 1 swath can hide a fetch for all surfaces */ |
| for (k = 0; k < NumberOfActiveSurfaces; k++) { |
| double LineTime = HTotal[k] / PixelClock[k]; |
| |
| /* only care if surface is not phantom */ |
| if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { |
| DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; |
| |
| if (SwathHeightC[k] != 0) { |
| DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; |
| |
| DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); |
| } else { |
| DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; |
| } |
| |
| /* DET must be able to hide time to fetch 1 swath for each surface */ |
| if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { |
| NotEnoughDETSwathFillLatencyHiding = true; |
| break; |
| } |
| } |
| } |
| |
| return NotEnoughDETSwathFillLatencyHiding; |
| } |