Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
gpma_bfs
Manage
Activity
Members
Labels
Plan
Issues
2
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Recolic
gpma_bfs
Commits
f603457b
There was an error fetching the commit references. Please try again later.
Commit
f603457b
authored
5 years ago
by
Recolic Keghart
Browse files
Options
Downloads
Patches
Plain Diff
cpu serial gpma_bfs done, debug success
parent
2d7f034e
No related branches found
No related tags found
1 merge request
!3
Multidev
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
gpma_bfs.cuh
+8
-4
8 additions, 4 deletions
gpma_bfs.cuh
gpma_bfs_demo.cu
+4
-17
4 additions, 17 deletions
gpma_bfs_demo.cu
with
12 additions
and
21 deletions
gpma_bfs.cuh
+
8
−
4
View file @
f603457b
...
...
@@ -19,7 +19,7 @@ __host__ __device__ inline bool gpma_bitmap_set_return_old(SIZE_TYPE *bitmap, si
SIZE_TYPE
bit_loc
=
1
<<
(
bit_offset
%
32
);
SIZE_TYPE
bit_chunk
=
bitmap
[
bit_offset
/
32
];
bool
old
=
(
bit_chunk
&
bit_loc
);
bitmap
[
bit_offset
/
32
]
=
bit_chunk
+
bit_loc
;
bitmap
[
bit_offset
/
32
]
=
old
?
bit_chunk
:
bit_chunk
+
bit_loc
;
return
old
;
}
}
...
...
@@ -284,7 +284,7 @@ void gpma_bfs_gather_cpu(SIZE_TYPE *node_queue, SIZE_TYPE *_node_queue_len, SIZE
const
auto
&
row_end
=
row_offsets
[
node
+
1
];
for
(
auto
gather
=
row_begin
;
gather
<
row_end
;
++
gather
)
{
auto
neighbor
=
(
SIZE_TYPE
)(
keys
[
gather
]
&
COL_IDX_NONE
);
auto
isValid
=
(
neighbor
!=
COL_IDX_NONE
&&
value
[
gather
]
!=
VALUE_NONE
);
auto
isValid
=
(
neighbor
!=
COL_IDX_NONE
&&
value
s
[
gather
]
!=
VALUE_NONE
);
if
(
isValid
)
{
// TODO: add lock_guard or use atomic
edge_queue
[
edge_queue_len
]
=
neighbor
;
...
...
@@ -296,12 +296,12 @@ void gpma_bfs_gather_cpu(SIZE_TYPE *node_queue, SIZE_TYPE *_node_queue_len, SIZE
void
gpma_bfs_contract_cpu
(
SIZE_TYPE
*
edge_queue
,
SIZE_TYPE
*
_edge_queue_len
,
SIZE_TYPE
*
node_queue
,
SIZE_TYPE
*
_node_queue_len
,
SIZE_TYPE
level
,
SIZE_TYPE
*
label
,
SIZE_TYPE
*
bitmap
)
{
auto
&
node_queue_len
=
*
_node_queue_len
;
auto
&
edge_queue_len
=
*
_edge_queue_len
;
decltype
(
*
label
)
zero
=
0
;
SIZE_TYPE
zero
=
0
;
for
(
auto
i
=
0
;
i
<
edge_queue_len
;
++
i
)
{
const
auto
&
neighbor
=
edge_queue
[
i
];
// TODO: Also add a small cache here.
if
(
gpma_bitmap_set_return_old
(
bitmap
,
neighbor
))
if
(
impl
::
gpma_bitmap_set_return_old
(
bitmap
,
neighbor
))
continue
;
// this node is not new.
// auto exchanged = __atomic_compare_exchange_n(label+neighbor, &zero, level, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
// if(!exchanged)
...
...
@@ -352,19 +352,23 @@ __host__ void gpma_bfs(KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE *row_offset
if
(
DEV
==
GPU
)
{
gpma_bfs_gather_kernel
<
THREADS_NUM
><<<
BLOCKS_NUM
,
THREADS_NUM
>>>
(
node_queue
,
node_queue_offset
,
edge_queue
,
edge_queue_offset
,
keys
,
values
,
row_offsets
);
}
else
{
gpma_bfs_gather_cpu
(
node_queue
,
node_queue_offset
,
edge_queue
,
edge_queue_offset
,
keys
,
values
,
row_offsets
);
}
// contract
level
++
;
anyMemcpy
<
CPU
,
DEV
>
(
node_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
));
anyMemcpy
<
DEV
,
CPU
>
(
host_num
,
edge_queue_offset
,
sizeof
(
SIZE_TYPE
));
//rlib::println("DEBUG:E:", rlib::printable_iter(native_vector<DEV, SIZE_TYPE>(edge_queue, edge_queue + *host_num)));
BLOCKS_NUM
=
CALC_BLOCKS_NUM
(
THREADS_NUM
,
host_num
[
0
]);
if
(
DEV
==
GPU
)
{
gpma_bfs_contract_kernel
<
THREADS_NUM
><<<
BLOCKS_NUM
,
THREADS_NUM
>>>
(
edge_queue
,
edge_queue_offset
,
node_queue
,
node_queue_offset
,
level
,
results
,
bitmap
);
}
else
{
gpma_bfs_contract_cpu
(
edge_queue
,
edge_queue_offset
,
node_queue
,
node_queue_offset
,
level
,
results
,
bitmap
);
}
anyMemcpy
<
DEV
,
CPU
>
(
host_num
,
node_queue_offset
,
sizeof
(
SIZE_TYPE
));
//rlib::println("DEBUG:N:", rlib::printable_iter(native_vector<DEV, SIZE_TYPE>(node_queue, node_queue + *host_num)));
if
(
0
==
host_num
[
0
])
break
;
...
...
This diff is collapsed.
Click to expand it.
gpma_bfs_demo.cu
+
4
−
17
View file @
f603457b
...
...
@@ -70,17 +70,11 @@ int main(int argc, char **argv) {
LOG_TIME
(
"before update_gpma 1"
)
update_gpma
(
gpma
,
base_keys
,
base_values
);
thrust
::
device_vector
<
SIZE_TYPE
>
bfs_result
(
node_size
);
native_vector
<
TEST_DEV
,
SIZE_TYPE
>
bfs_result
(
node_size
);
cudaDeviceSynchronize
();
LOG_TIME
(
"before first bfs"
)
{
auto
gpma_mirror
=
gpma
#if TEST_DEV == CPU
.
mirror
()
#endif
;
gpma_bfs
<
GPU
>
(
RAW_PTR
(
gpma_mirror
.
keys
),
RAW_PTR
(
gpma_mirror
.
values
),
RAW_PTR
(
gpma_mirror
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
}
LOG_TIME
(
"before first bfs"
)
gpma_bfs
<
TEST_DEV
>
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
int
reach_nodes
=
node_size
-
thrust
::
count
(
bfs_result
.
begin
(),
bfs_result
.
end
(),
0
);
printf
(
"start from node %d, number of reachable nodes: %d
\n
"
,
bfs_start_node
,
reach_nodes
);
...
...
@@ -108,14 +102,7 @@ int main(int argc, char **argv) {
printf
(
"Graph is updated.
\n
"
);
LOG_TIME
(
"before second bfs"
)
{
auto
gpma_mirror
=
gpma
#if TEST_DEV == CPU
.
mirror
()
#endif
;
gpma_bfs
<
GPU
>
(
RAW_PTR
(
gpma_mirror
.
keys
),
RAW_PTR
(
gpma_mirror
.
values
),
RAW_PTR
(
gpma_mirror
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
}
gpma_bfs
<
TEST_DEV
>
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
reach_nodes
=
node_size
-
thrust
::
count
(
bfs_result
.
begin
(),
bfs_result
.
end
(),
0
);
printf
(
"start from node %d, number of reachable nodes: %d
\n
"
,
bfs_start_node
,
reach_nodes
);
LOG_TIME
(
"after second bfs"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment