Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional changes #1778

Draft
wants to merge 22 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
e38954a
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Jul 31, 2024
c1ba5c1
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Jul 31, 2024
b876c50
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Jul 31, 2024
fd81903
Replace calls of __future::wait() to calls of __future::deferrable_wa…
SergeyKopienko Jul 31, 2024
1841b49
Fix review comments
SergeyKopienko Jul 31, 2024
13bf823
Fix review comment: rename deferrable_wait to __deferrable_wait
SergeyKopienko Jul 31, 2024
d6e35a2
Fix review comment: restore wait() call instead of __deferrable_wait(…
SergeyKopienko Jul 31, 2024
d2d12aa
Fix review comment: restore wait() call instead of __deferrable_wait(…
SergeyKopienko Jul 31, 2024
0d97b4d
Add better explanation of the behavior enabled by ONEDPL_ALLOW_DEFERR…
akukanov Aug 1, 2024
e72f23e
Merge branch 'main' into dev/skopienko/fix_future_wait_and_throw_impl
SergeyKopienko Aug 12, 2024
a04d0b7
Merge branch 'main' into dev/skopienko/fix_future_wait_and_throw_impl
SergeyKopienko Aug 13, 2024
4d1d465
include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - implements 3…
SergeyKopienko Aug 13, 2024
02189a8
include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h - fix review c…
SergeyKopienko Aug 13, 2024
43e003d
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Aug 13, 2024
11fbad3
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Aug 13, 2024
0ad7ca8
include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h - …
SergeyKopienko Aug 13, 2024
aed1fc6
Update include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h
SergeyKopienko Aug 13, 2024
b0ccfa9
documentation/library_guide/macros.rst - fix review comment
SergeyKopienko Aug 13, 2024
5cccfa3
Apply GitHUB clang format
SergeyKopienko Aug 13, 2024
4054f86
Move __wait_future_result and staff into include/oneapi/dpl/pstl/hete…
SergeyKopienko Aug 13, 2024
4e447ba
@@@
SergeyKopienko Aug 13, 2024
047e6de
@@@
SergeyKopienko Aug 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions documentation/library_guide/macros.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ Macro Description
---------------------------------- ------------------------------
``ONEDPL_ALLOW_DEFERRED_WAITING`` This macro allows waiting for completion of certain algorithms executed with
device policies to be deferred. (Disabled by default.)

When the macro evaluates to non-zero, a call to a oneDPL algorithm with
a device policy might return before the computation completes on the device.

.. Warning:: Before accessing data produced or modified by the call, waiting
for completion of all tasks in the corresponding SYCL queue is required;
otherwise, the program behavior is undefined.
---------------------------------- ------------------------------
``ONEDPL_FPGA_DEVICE`` Use this macro to build your code containing |onedpl_short| parallel
algorithms for FPGA devices. (Disabled by default.)
Expand Down
11 changes: 7 additions & 4 deletions include/oneapi/dpl/experimental/kt/single_pass_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ struct __lookback_submitter<__data_per_workitem, __workgroup_size, _Type, _FlagT
}
};

template <bool _Inclusive, typename _InRange, typename _OutRange, typename _BinaryOp, typename _KernelParam>
template <typename _WaitMode = oneapi::dpl::__par_backend_hetero::__async_mode, bool _Inclusive, typename _InRange,
typename _OutRange, typename _BinaryOp, typename _KernelParam>
sycl::event
__single_pass_scan(sycl::queue __queue, _InRange&& __in_rng, _OutRange&& __out_rng, _BinaryOp __binary_op, _KernelParam)
{
Expand Down Expand Up @@ -334,7 +335,7 @@ __single_pass_scan(sycl::queue __queue, _InRange&& __in_rng, _OutRange&& __out_r
// Perform a single-work group scan if the input is small
if (oneapi::dpl::__par_backend_hetero::__group_scan_fits_in_slm<_Type>(__queue, __n, __n_uniform))
{
return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_single_group(
return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_single_group<_WaitMode>(
oneapi::dpl::__internal::__device_backend_tag{},
oneapi::dpl::execution::__dpl::make_device_policy<typename _KernelParam::kernel_name>(__queue),
std::forward<_InRange>(__in_rng), std::forward<_OutRange>(__out_rng), __n,
Expand Down Expand Up @@ -411,7 +412,8 @@ inclusive_scan(sycl::queue __queue, _InRng&& __in_rng, _OutRng&& __out_rng, _Bin
auto __in_view = oneapi::dpl::__ranges::views::all(std::forward<_InRng>(__in_rng));
auto __out_view = oneapi::dpl::__ranges::views::all(std::forward<_OutRng>(__out_rng));

return __impl::__single_pass_scan<true>(__queue, std::move(__in_view), std::move(__out_view), __binary_op, __param);
return __impl::__single_pass_scan<__par_backend_hetero::__deferrable_mode>(__queue, std::move(__in_view), std::move(__out_view),
__binary_op, __param);
}

template <typename _InIterator, typename _OutIterator, typename _BinaryOp, typename _KernelParam>
Expand All @@ -426,7 +428,8 @@ inclusive_scan(sycl::queue __queue, _InIterator __in_begin, _InIterator __in_end
auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _OutIterator>();
auto __buf2 = __keep2(__out_begin, __out_begin + __n);

return __impl::__single_pass_scan<true>(__queue, __buf1.all_view(), __buf2.all_view(), __binary_op, __param);
return __impl::__single_pass_scan<__par_backend_hetero::__deferrable_mode>(__queue, __buf1.all_view(), __buf2.all_view(), __binary_op,
__param);
}

} // namespace gpu
Expand Down
29 changes: 19 additions & 10 deletions include/oneapi/dpl/internal/async_impl/async_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ namespace dpl
namespace __internal
{

template <typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Function>
template <typename _WaitMode = oneapi::dpl::__par_backend_hetero::__async_mode, typename _BackendTag,
typename _ExecutionPolicy, typename _ForwardIterator, typename _Function>
auto
__pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first,
_ForwardIterator __last, _Function __f)
Expand All @@ -45,10 +46,14 @@ __pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For
auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view());

// Call optional wait: no wait, wait or deferrable wait.
oneapi::dpl::__par_backend_hetero::__wait_future_result<_WaitMode>{}(__future_obj);

return __future_obj;
}

template <typename _IsSync = ::std::false_type,
template <typename _WaitMode = __par_backend_hetero::__async_mode,
__par_backend_hetero::access_mode __acc_mode1 = __par_backend_hetero::access_mode::read,
__par_backend_hetero::access_mode __acc_mode2 = __par_backend_hetero::access_mode::write,
typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2,
Expand All @@ -66,18 +71,19 @@ __pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For
auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>();
auto __buf2 = __keep2(__first2, __first2 + __n);

auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for(
auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for(
_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view());

if constexpr (_IsSync::value)
__future.wait();
// Call optional wait: no wait, wait or deferrable wait.
oneapi::dpl::__par_backend_hetero::__wait_future_result<_WaitMode>{}(__future_obj);

return __future.__make_future(__first2 + __n);
return __future_obj.__make_future(__first2 + __n);
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2,
typename _ForwardIterator3, typename _Function>
template <typename _WaitMode = oneapi::dpl::__par_backend_hetero::__async_mode, typename _BackendTag,
typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, typename _ForwardIterator3,
typename _Function>
auto
__pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1,
_ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f)
Expand All @@ -95,12 +101,15 @@ __pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For
oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>();
auto __buf3 = __keep3(__first3, __first3 + __n);

auto __future =
auto __future_obj =
oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec),
unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n,
__buf1.all_view(), __buf2.all_view(), __buf3.all_view());

return __future.__make_future(__first3 + __n);
// Call optional wait: no wait, wait or deferrable wait.
oneapi::dpl::__par_backend_hetero::__wait_future_result<_WaitMode>{}(__future_obj);

return __future_obj.__make_future(__first3 + __n);
}

template <typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2,
Expand Down
20 changes: 9 additions & 11 deletions include/oneapi/dpl/internal/binary_search_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,10 @@ lower_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIt
auto result_buf = keep_result(result, result + value_size);
auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view());
const bool use_32bit_indexing = size <= std::numeric_limits<std::uint32_t>::max();
__bknd::__parallel_for(
__bknd::__parallel_for<__par_backend_hetero::__deferrable_mode>(
_BackendTag{}, ::std::forward<decltype(policy)>(policy),
custom_brick<StrictWeakOrdering, decltype(size), search_algorithm::lower_bound>{comp, size, use_32bit_indexing},
value_size, zip_vw)
.wait();
value_size, zip_vw);
return result + value_size;
}

Expand Down Expand Up @@ -179,11 +178,10 @@ upper_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIt
auto result_buf = keep_result(result, result + value_size);
auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view());
const bool use_32bit_indexing = size <= std::numeric_limits<std::uint32_t>::max();
__bknd::__parallel_for(
__bknd::__parallel_for<__par_backend_hetero::__deferrable_mode>(
_BackendTag{}, std::forward<decltype(policy)>(policy),
custom_brick<StrictWeakOrdering, decltype(size), search_algorithm::upper_bound>{comp, size, use_32bit_indexing},
value_size, zip_vw)
.wait();
value_size, zip_vw);
return result + value_size;
}

Expand Down Expand Up @@ -211,11 +209,11 @@ binary_search_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, Input
auto result_buf = keep_result(result, result + value_size);
auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view());
const bool use_32bit_indexing = size <= std::numeric_limits<std::uint32_t>::max();
__bknd::__parallel_for(_BackendTag{}, std::forward<decltype(policy)>(policy),
custom_brick<StrictWeakOrdering, decltype(size), search_algorithm::binary_search>{
comp, size, use_32bit_indexing},
value_size, zip_vw)
.wait();
__bknd::__parallel_for<__par_backend_hetero::__deferrable_mode>(
_BackendTag{}, std::forward<decltype(policy)>(policy),
custom_brick<StrictWeakOrdering, decltype(size), search_algorithm::binary_search>{comp, size,
use_32bit_indexing},
value_size, zip_vw);
return result + value_size;
}

Expand Down
Loading