Skip to content
Snippets Groups Projects
Commit f6e39a7a authored by Recolic Keghart's avatar Recolic Keghart
Browse files

fix more loss of anySync for cub::? functions

parent 6f5ab3d5
No related branches found
No related tags found
No related merge requests found
...@@ -160,6 +160,7 @@ __device__ void cub_sort_key_value(KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE ...@@ -160,6 +160,7 @@ __device__ void cub_sort_key_value(KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE
cErr(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, keys, tmp_keys, values, tmp_values, size)); cErr(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, keys, tmp_keys, values, tmp_values, size));
cErr(cudaMalloc(&d_temp_storage, temp_storage_bytes)); cErr(cudaMalloc(&d_temp_storage, temp_storage_bytes));
cErr(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, keys, tmp_keys, values, tmp_values, size)); cErr(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, keys, tmp_keys, values, tmp_values, size));
anySync<GPU>();
SIZE_TYPE THREADS_NUM = 128; SIZE_TYPE THREADS_NUM = 128;
SIZE_TYPE BLOCKS_NUM = CALC_BLOCKS_NUM(THREADS_NUM, size); SIZE_TYPE BLOCKS_NUM = CALC_BLOCKS_NUM(THREADS_NUM, size);
...@@ -771,7 +772,7 @@ template <dev_type_t DEV> ...@@ -771,7 +772,7 @@ template <dev_type_t DEV>
void rebalance_batch(SIZE_TYPE level, SIZE_TYPE seg_length, KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE *update_nodes, KEY_TYPE *update_keys, VALUE_TYPE *update_values, SIZE_TYPE update_size, SIZE_TYPE *unique_update_nodes, SIZE_TYPE *update_offset, SIZE_TYPE unique_update_size, SIZE_TYPE lower_bound, SIZE_TYPE upper_bound, SIZE_TYPE *row_offset) { void rebalance_batch(SIZE_TYPE level, SIZE_TYPE seg_length, KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE *update_nodes, KEY_TYPE *update_keys, VALUE_TYPE *update_values, SIZE_TYPE update_size, SIZE_TYPE *unique_update_nodes, SIZE_TYPE *update_offset, SIZE_TYPE unique_update_size, SIZE_TYPE lower_bound, SIZE_TYPE upper_bound, SIZE_TYPE *row_offset) {
// TryInsert+ is this function. // TryInsert+ is this function.
SIZE_TYPE update_width = seg_length << level; // real seg_length of this level SIZE_TYPE update_width = seg_length << level; // real seg_length of this level
if (false && update_width <= 1024) { if (update_width <= 1024) {
assert(IsPowerOfTwo(update_width)); assert(IsPowerOfTwo(update_width));
if (DEV == GPU) { if (DEV == GPU) {
// func pointer for each template // func pointer for each template
......
...@@ -100,8 +100,8 @@ void anyRunLengthEncoding(const SIZE_TYPE *inputVec, SIZE_TYPE inputLen, SIZE_TY ...@@ -100,8 +100,8 @@ void anyRunLengthEncoding(const SIZE_TYPE *inputVec, SIZE_TYPE inputLen, SIZE_TY
cErr(cub::DeviceRunLengthEncode::Encode(temp_storage, temp_storage_bytes, inputVec, outputVec, outputLenVec, outputLen, inputLen)); cErr(cub::DeviceRunLengthEncode::Encode(temp_storage, temp_storage_bytes, inputVec, outputVec, outputLenVec, outputLen, inputLen));
anySync<DEV>(); // TODO: test and remove them. anySync<DEV>(); // TODO: test and remove them.
anyMalloc<DEV>(&temp_storage, temp_storage_bytes); anyMalloc<DEV>(&temp_storage, temp_storage_bytes);
anySync<DEV>();
cErr(cub::DeviceRunLengthEncode::Encode(temp_storage, temp_storage_bytes, inputVec, outputVec, outputLenVec, outputLen, inputLen)); cErr(cub::DeviceRunLengthEncode::Encode(temp_storage, temp_storage_bytes, inputVec, outputVec, outputLenVec, outputLen, inputLen));
anySync<DEV>();
anyFree<DEV>(temp_storage); anyFree<DEV>(temp_storage);
SIZE_TYPE tmp; SIZE_TYPE tmp;
...@@ -124,10 +124,9 @@ __host__ __device__ void cudaExclusiveSum(const SIZE_TYPE *inputVec, SIZE_TYPE * ...@@ -124,10 +124,9 @@ __host__ __device__ void cudaExclusiveSum(const SIZE_TYPE *inputVec, SIZE_TYPE *
void *temp_storage = NULL; void *temp_storage = NULL;
size_t temp_storage_bytes = 0; size_t temp_storage_bytes = 0;
cErr(cub::DeviceScan::ExclusiveSum(temp_storage, temp_storage_bytes, inputVec, outputVec, len)); cErr(cub::DeviceScan::ExclusiveSum(temp_storage, temp_storage_bytes, inputVec, outputVec, len));
anySync<GPU>();
anyMalloc<GPU>(&temp_storage, temp_storage_bytes); anyMalloc<GPU>(&temp_storage, temp_storage_bytes);
anySync<GPU>();
cErr(cub::DeviceScan::ExclusiveSum(temp_storage, temp_storage_bytes, inputVec, outputVec, len)); cErr(cub::DeviceScan::ExclusiveSum(temp_storage, temp_storage_bytes, inputVec, outputVec, len));
anySync<GPU>();
anyFree<GPU>(temp_storage); anyFree<GPU>(temp_storage);
} }
template <dev_type_t DEV> template <dev_type_t DEV>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment