From 667a66ee0e99f3f3263c1ef2de1b90d9244b7bd4 Mon Sep 17 00:00:00 2001 From: Pierre Chatelier Date: Mon, 22 Jul 2024 15:00:24 +0200 Subject: [PATCH] Merge pull request #3607 from chacha21:cuda_phase_interleaved Add interleaved versions of phase/cartToPolar/polarToCart #3607 This PR is for performance only (at the cost of more template code and increased GPU code size) The additional variants can help the caller skip the creation of temporary GPU mats (where memory is more likely to be a critical resource), and can even allow in-place processing. magnitude/angles/x/y are often already interleaved when dealing with DFTs. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [X] I agree to contribute to the project under Apache 2 License. - [X] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [X] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [X] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [X] The feature is well documented and sample code can be built with the project CMake --- .../cudaarithm/include/opencv2/cudaarithm.hpp | 53 ++++ modules/cudaarithm/src/cuda/polar_cart.cu | 242 +++++++++++++++--- .../test/test_element_operations.cpp | 231 +++++++++++++++++ .../opencv2/cudev/functional/functional.hpp | 50 ++++ 4 files changed, 545 insertions(+), 31 deletions(-) diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp index bb74ea18918..a16c271881e 100644 --- a/modules/cudaarithm/include/opencv2/cudaarithm.hpp +++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp @@ -433,6 +433,17 @@ CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude */ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Computes polar angles of complex matrix elements. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa phase +*/ +CV_EXPORTS_W void phase(InputArray xy, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + /** @brief Converts Cartesian coordinates into polar. @param x Source matrix containing real components ( CV_32FC1 ). @@ -446,6 +457,29 @@ CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angl */ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Converts Cartesian coordinates into polar. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar +*/ +CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts Cartesian coordinates into polar. + +@param xy Source matrix containing real and imaginary components ( CV_32FC2 ). +@param magnitudeAngle Destination matrix of float magnitudes and angles ( CV_32FC2 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar +*/ +CV_EXPORTS_W void cartToPolar(InputArray xy, OutputArray magnitudeAngle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + /** @brief Converts polar coordinates into Cartesian. @param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ). @@ -457,6 +491,25 @@ CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, */ CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); +/** @brief Converts polar coordinates into Cartesian. + +@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ). +@param angle Source matrix containing angles ( same type as magnitude ). +@param xy Destination matrix of real and imaginary components ( same depth as magnitude, i.e. CV_32FC2 or CV_64FC2 ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts polar coordinates into Cartesian. + +@param magnitudeAngle Source matrix containing magnitudes and angles ( CV_32FC2 or CV_64FC2 ). +@param xy Destination matrix of real and imaginary components ( same depth as source ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void polarToCart(InputArray magnitudeAngle, OutputArray xy, bool angleInDegrees = false, Stream& stream = Stream::Null()); + //! @} cudaarithm_elem //! @addtogroup cudaarithm_core diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index 12980e424ff..725f5741d81 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -52,8 +52,10 @@ #include "opencv2/cudev.hpp" #include "opencv2/core/private.cuda.hpp" -using namespace cv; -using namespace cv::cuda; +//do not use implicit cv::cuda to avoid clash of tuples from ::cuda::std +/*using namespace cv; +using namespace cv::cuda;*/ + using namespace cv::cudev; void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) @@ -66,11 +68,7 @@ void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(dst.reshape(1)); - - gridTransformBinary(xc, yc, magc, magnitude_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), magnitude_func(), stream); syncOutput(dst, _dst, stream); } @@ -85,11 +83,7 @@ void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stre GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(dst.reshape(1)); - - gridTransformBinary(xc, yc, magc, magnitude_sqr_func(), stream); + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), magnitude_sqr_func(), stream); syncOutput(dst, _dst, stream); } @@ -104,14 +98,26 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ anglec(dst.reshape(1)); + if (angleInDegrees) + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), direction_func(), stream); + else + gridTransformBinary(globPtr(x), globPtr(y), globPtr(dst), direction_func(), stream); + + syncOutput(dst, _dst, stream); +} + +void cv::cuda::phase(InputArray _xy, OutputArray _dst, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat dst = getOutputMat(_dst, xy.size(), CV_32FC1, stream); if (angleInDegrees) - gridTransformBinary(xc, yc, anglec, direction_func(), stream); + gridTransformUnary(globPtr(xy), globPtr(dst), direction_interleaved_func(), stream); else - gridTransformBinary(xc, yc, anglec, direction_func(), stream); + gridTransformUnary(globPtr(xy), globPtr(dst), direction_interleaved_func(), stream); syncOutput(dst, _dst, stream); } @@ -127,10 +133,10 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream); GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream); - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); + GpuMat_ xc(x); + GpuMat_ yc(y); + GpuMat_ magc(mag); + GpuMat_ anglec(angle); if (angleInDegrees) gridTransformBinary(xc, yc, magc, anglec, magnitude_func(), direction_func(), stream); @@ -141,6 +147,69 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu syncOutput(angle, _angle, stream); } +void cv::cuda::cartToPolar(InputArray _xy, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat mag = getOutputMat(_mag, xy.size(), CV_32FC1, stream); + GpuMat angle = getOutputMat(_angle, xy.size(), CV_32FC1, stream); + + GpuMat_ magc(mag); + GpuMat_ anglec(angle); + + if (angleInDegrees) + { + auto f1 = magnitude_interleaved_func(); + auto f2 = direction_interleaved_func(); + cv::cudev::tuple f12 = cv::cudev::make_tuple(f1, f2); + gridTransformTuple(globPtr(xy), + tie(magc, anglec), + f12, + stream); + } + else + { + auto f1 = magnitude_interleaved_func(); + auto f2 = direction_interleaved_func(); + cv::cudev::tuple f12 = cv::cudev::make_tuple(f1, f2); + gridTransformTuple(globPtr(xy), + tie(magc, anglec), + f12, + stream); + } + + syncOutput(mag, _mag, stream); + syncOutput(angle, _angle, stream); +} + +void cv::cuda::cartToPolar(InputArray _xy, OutputArray _magAngle, bool angleInDegrees, Stream& stream) +{ + GpuMat xy = getInputMat(_xy, stream); + + CV_Assert( xy.type() == CV_32FC2 ); + + GpuMat magAngle = getOutputMat(_magAngle, xy.size(), CV_32FC2, stream); + + if (angleInDegrees) + { + gridTransformUnary(globPtr(xy), + globPtr(magAngle), + magnitude_direction_interleaved_func(), + stream); + } + else + { + gridTransformUnary(globPtr(xy), + globPtr(magAngle), + magnitude_direction_interleaved_func(), + stream); + } + + syncOutput(magAngle, _magAngle, stream); +} + namespace { template struct sincos_op @@ -159,12 +228,12 @@ namespace }; template - __global__ void polarToCartImpl_(const GlobPtr mag, const GlobPtr angle, GlobPtr xmat, GlobPtr ymat, const T scale, const int rows, const int cols) + __global__ void polarToCartImpl_(const PtrStep mag, const PtrStepSz angle, PtrStep xmat, PtrStep ymat, const T scale) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x >= cols || y >= rows) + if (x >= angle.cols || y >= angle.rows) return; const T mag_val = useMag ? mag(y, x) : static_cast(1.0); @@ -178,23 +247,90 @@ namespace ymat(y, x) = mag_val * sin_a; } + template + __global__ void polarToCartDstInterleavedImpl_(const PtrStep mag, const PtrStepSz angle, PtrStep::type > xymat, const T scale) + { + typedef typename MakeVec::type T2; + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x >= angle.cols || y >= angle.rows) + return; + + const T mag_val = useMag ? mag(y, x) : static_cast(1.0); + const T angle_val = angle(y, x); + + T sin_a, cos_a; + sincos_op op; + op(scale * angle_val, &sin_a, &cos_a); + + const T2 xy = {mag_val * cos_a, mag_val * sin_a}; + xymat(y, x) = xy; + } + + template + __global__ void polarToCartInterleavedImpl_(const PtrStepSz::type > magAngle, PtrStep::type > xymat, const T scale) + { + typedef typename MakeVec::type T2; + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x >= magAngle.cols || y >= magAngle.rows) + return; + + const T2 magAngle_val = magAngle(y, x); + const T mag_val = magAngle_val.x; + const T angle_val = magAngle_val.y; + + T sin_a, cos_a; + sincos_op op; + op(scale * angle_val, &sin_a, &cos_a); + + const T2 xy = {mag_val * cos_a, mag_val * sin_a}; + xymat(y, x) = xy; + } + template void polarToCartImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t& stream) { - GpuMat_ xc(x.reshape(1)); - GpuMat_ yc(y.reshape(1)); - GpuMat_ magc(mag.reshape(1)); - GpuMat_ anglec(angle.reshape(1)); + const dim3 block(32, 8); + const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); + + const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); + + if (mag.empty()) + polarToCartImpl_ << > >(mag, angle, x, y, scale); + else + polarToCartImpl_ << > >(mag, angle, x, y, scale); + } + + template + void polarToCartDstInterleavedImpl(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) + { + typedef typename MakeVec::type T2; const dim3 block(32, 8); - const dim3 grid(divUp(anglec.cols, block.x), divUp(anglec.rows, block.y)); + const dim3 grid(divUp(angle.cols, block.x), divUp(angle.rows, block.y)); const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); - if (magc.empty()) - polarToCartImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); + if (mag.empty()) + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale); else - polarToCartImpl_ << > >(shrinkPtr(magc), shrinkPtr(anglec), shrinkPtr(xc), shrinkPtr(yc), scale, anglec.rows, anglec.cols); + polarToCartDstInterleavedImpl_ << > >(mag, angle, xy, scale); + } + + template + void polarToCartInterleavedImpl(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream) + { + typedef typename MakeVec::type T2; + + const dim3 block(32, 8); + const dim3 grid(divUp(magAngle.cols, block.x), divUp(magAngle.rows, block.y)); + + const T scale = angleInDegrees ? static_cast(CV_PI / 180.0) : static_cast(1.0); + + polarToCartInterleavedImpl_ << > >(magAngle, xy, scale); } } @@ -223,4 +359,48 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); } +void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _xy, bool angleInDegrees, Stream& _stream) +{ + typedef void(*func_t)(const GpuMat& mag, const GpuMat& angle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); + static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartDstInterleavedImpl, polarToCartDstInterleavedImpl }; + + GpuMat mag = getInputMat(_mag, _stream); + GpuMat angle = getInputMat(_angle, _stream); + + CV_Assert(angle.depth() == CV_32F || angle.depth() == CV_64F); + CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) ); + + GpuMat xy = getOutputMat(_xy, angle.size(), CV_MAKETYPE(angle.depth(), 2), _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + funcs[angle.depth()](mag, angle, xy, angleInDegrees, stream); + CV_CUDEV_SAFE_CALL( cudaGetLastError() ); + + syncOutput(xy, _xy, _stream); + + if (stream == 0) + CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); +} + +void cv::cuda::polarToCart(InputArray _magAngle, OutputArray _xy, bool angleInDegrees, Stream& _stream) +{ + typedef void(*func_t)(const GpuMat& magAngle, GpuMat& xy, bool angleInDegrees, cudaStream_t& stream); + static const func_t funcs[7] = { 0, 0, 0, 0, 0, polarToCartInterleavedImpl, polarToCartInterleavedImpl }; + + GpuMat magAngle = getInputMat(_magAngle, _stream); + + CV_Assert(magAngle.type() == CV_32FC2 || magAngle.type() == CV_64FC2); + + GpuMat xy = getOutputMat(_xy, magAngle.size(), magAngle.type(), _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + funcs[magAngle.depth()](magAngle, xy, angleInDegrees, stream); + CV_CUDEV_SAFE_CALL( cudaGetLastError() ); + + syncOutput(xy, _xy, _stream); + + if (stream == 0) + CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); +} + #endif diff --git a/modules/cudaarithm/test/test_element_operations.cpp b/modules/cudaarithm/test/test_element_operations.cpp index d2e314b10d9..a15ad7b3ee5 100644 --- a/modules/cudaarithm/test/test_element_operations.cpp +++ b/modules/cudaarithm/test/test_element_operations.cpp @@ -2765,6 +2765,47 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Phase, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(PhaseInterleaved, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PhaseInterleaved, Accuracy) +{ + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); + cv::Mat xy; + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); + + cv::cuda::GpuMat dst = createMat(size, CV_32FC1, useRoi); + cv::cuda::phase(loadMat(xy, useRoi), dst, angleInDegrees); + + cv::Mat dst_gold; + cv::phase(x, y, dst_gold, angleInDegrees); + + EXPECT_MAT_NEAR(dst_gold, dst, angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PhaseInterleaved, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // CartToPolar @@ -2809,6 +2850,97 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolar, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(CartToPolarInterleavedXY, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(CartToPolarInterleavedXY, Accuracy) +{ + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); + cv::Mat xy; + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); + + cv::cuda::GpuMat mag = createMat(size, CV_32FC1, useRoi); + cv::cuda::GpuMat angle = createMat(size, CV_32FC1, useRoi); + cv::cuda::cartToPolar(loadMat(xy, useRoi), mag, angle, angleInDegrees); + + cv::Mat mag_gold; + cv::Mat angle_gold; + cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees); + + EXPECT_MAT_NEAR(mag_gold, mag, 1e-4); + EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleavedXY, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + +PARAM_TEST_CASE(CartToPolarInterleavedXYMagAngle, cv::cuda::DeviceInfo, cv::Size, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + angleInDegrees = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(CartToPolarInterleavedXYMagAngle, Accuracy) +{ + cv::Mat x = randomMat(size, CV_32FC1); + cv::Mat y = randomMat(size, CV_32FC1); + cv::Mat xy; + std::vector xyChannels = {x, y}; + cv::merge(xyChannels, xy); + + cv::cuda::GpuMat magAngle = createMat(size, CV_32FC2, useRoi); + cv::cuda::cartToPolar(loadMat(xy, useRoi), magAngle, angleInDegrees); + std::vector magAngleChannels; + cv::cuda::split(magAngle, magAngleChannels); + cv::cuda::GpuMat& mag = magAngleChannels[0]; + cv::cuda::GpuMat& angle = magAngleChannels[1]; + + cv::Mat mag_gold; + cv::Mat angle_gold; + cv::cartToPolar(x, y, mag_gold, angle_gold, angleInDegrees); + + EXPECT_MAT_NEAR(mag_gold, mag, 1e-4); + EXPECT_MAT_NEAR(angle_gold, angle, angleInDegrees ? 1e-2 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, CartToPolarInterleavedXYMagAngle, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // polarToCart @@ -2857,5 +2989,104 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCart, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); +PARAM_TEST_CASE(PolarToCartInterleaveXY, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + int type; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + angleInDegrees = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PolarToCartInterleaveXY, Accuracy) +{ + cv::Mat magnitude = randomMat(size, type); + cv::Mat angle = randomMat(size, type); + const double tol = (type == CV_32FC1 ? 1.6e-4 : 1e-4) * (angleInDegrees ? 1.0 : 19.47); + + cv::cuda::GpuMat xy = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); + cv::cuda::polarToCart(loadMat(magnitude, useRoi), loadMat(angle, useRoi), xy, angleInDegrees); + std::vector xyChannels; + cv::cuda::split(xy, xyChannels); + cv::cuda::GpuMat& x = xyChannels[0]; + cv::cuda::GpuMat& y = xyChannels[1]; + + cv::Mat x_gold; + cv::Mat y_gold; + cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees); + + EXPECT_MAT_NEAR(x_gold, x, tol); + EXPECT_MAT_NEAR(y_gold, y, tol); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaveXY, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + +PARAM_TEST_CASE(PolarToCartInterleaveMagAngleXY, cv::cuda::DeviceInfo, cv::Size, MatType, AngleInDegrees, UseRoi) +{ + cv::cuda::DeviceInfo devInfo; + cv::Size size; + int type; + bool angleInDegrees; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + angleInDegrees = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::cuda::setDevice(devInfo.deviceID()); + } +}; + +CUDA_TEST_P(PolarToCartInterleaveMagAngleXY, Accuracy) +{ + cv::Mat magnitude = randomMat(size, type); + cv::Mat angle = randomMat(size, type); + std::vector magAngleChannels = {magnitude, angle}; + cv::Mat magAngle; + cv::merge(magAngleChannels, magAngle); + const double tol = (type == CV_32FC1 ? 1.6e-4 : 1e-4) * (angleInDegrees ? 1.0 : 19.47); + + cv::cuda::GpuMat xy = createMat(size, CV_MAKETYPE(CV_MAT_DEPTH(type), 2), useRoi); + cv::cuda::polarToCart(loadMat(magAngle, useRoi), xy, angleInDegrees); + std::vector xyChannels; + cv::cuda::split(xy, xyChannels); + cv::cuda::GpuMat& x = xyChannels[0]; + cv::cuda::GpuMat& y = xyChannels[1]; + + cv::Mat x_gold; + cv::Mat y_gold; + cv::polarToCart(magnitude, angle, x_gold, y_gold, angleInDegrees); + + EXPECT_MAT_NEAR(x_gold, x, tol); + EXPECT_MAT_NEAR(y_gold, y, tol); +} + +INSTANTIATE_TEST_CASE_P(CUDA_Arithm, PolarToCartInterleaveMagAngleXY, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(CV_32FC1, CV_64FC1), + testing::Values(AngleInDegrees(false), AngleInDegrees(true)), + WHOLE_SUBMAT)); + }} // namespace #endif // HAVE_CUDA diff --git a/modules/cudev/include/opencv2/cudev/functional/functional.hpp b/modules/cudev/include/opencv2/cudev/functional/functional.hpp index e4165358c28..ddf2cf809e3 100644 --- a/modules/cudev/include/opencv2/cudev/functional/functional.hpp +++ b/modules/cudev/include/opencv2/cudev/functional/functional.hpp @@ -619,6 +619,17 @@ template struct magnitude_func : binary_function struct magnitude_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + typedef typename TypeTraits::parameter_type parameter_type; + __device__ __forceinline__ T operator ()(parameter_type ab) const + { + sqrt_func::type> f; + return f(ab.x * ab.x + ab.y * ab.y); + } +}; + template struct magnitude_sqr_func : binary_function::type> { __device__ __forceinline__ typename functional_detail::FloatType::type operator ()(typename TypeTraits::parameter_type a, typename TypeTraits::parameter_type b) const @@ -627,6 +638,16 @@ template struct magnitude_sqr_func : binary_function struct magnitude_sqr_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + typedef typename TypeTraits::parameter_type parameter_type; + __device__ __forceinline__ T operator ()(parameter_type ab) const + { + return ab.x * ab.x + ab.y * ab.y; + } +}; + template struct direction_func : binary_function { __device__ T operator ()(T x, T y) const @@ -643,6 +664,35 @@ template struct direction_func : binary_functi } }; +template struct direction_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + __device__ T operator ()(T2 xy) const + { + atan2_func f; + typename atan2_func::result_type angle = f(xy.y, xy.x); + + angle += (angle < 0) * (2.0f * CV_PI_F); + + if (angleInDegrees) + angle *= (180.0f / CV_PI_F); + + return saturate_cast(angle); + } +}; + +template struct magnitude_direction_interleaved_func : unary_function::elem_type> +{ + typedef typename VecTraits::elem_type T; + __device__ T2 operator ()(T2 xy) const + { + const T mag = magnitude_interleaved_func()(xy); + const T angle = direction_interleaved_func()(xy); + const T2 magAngle = {saturate_cast(mag), saturate_cast(angle)}; + return magAngle; + } +}; + template struct pow_func : binary_function { __device__ __forceinline__ float operator ()(T val, float power) const