Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
gpma_bfs
Manage
Activity
Members
Labels
Plan
Issues
2
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Recolic
gpma_bfs
Commits
e6fbb568
There was an error fetching the commit references. Please try again later.
Commit
e6fbb568
authored
5 years ago
by
Recolic Keghart
Browse files
Options
Downloads
Patches
Plain Diff
first stage
parent
f6e39a7a
No related branches found
No related tags found
1 merge request
!2
Bfs cpu
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
gpma_bfs.cuh
+22
-20
22 additions, 20 deletions
gpma_bfs.cuh
gpma_bfs_demo.cu
+2
-2
2 additions, 2 deletions
gpma_bfs_demo.cu
utils.cuh
+8
-0
8 additions, 0 deletions
utils.cuh
with
32 additions
and
22 deletions
gpma_bfs.cuh
+
22
−
20
View file @
e6fbb568
#pragma once
#include
"cub/cub.cuh"
#include
"utils.cuh"
#define FULL_MASK 0xffffffff
...
...
@@ -247,30 +248,31 @@ __global__ void gpma_bfs_contract_kernel(SIZE_TYPE *edge_queue, SIZE_TYPE *edge_
}
}
template
<
dev_type_t
DEV
>
__host__
void
gpma_bfs
(
KEY_TYPE
*
keys
,
VALUE_TYPE
*
values
,
SIZE_TYPE
*
row_offsets
,
SIZE_TYPE
node_size
,
SIZE_TYPE
edge_size
,
SIZE_TYPE
start_node
,
SIZE_TYPE
*
results
)
{
cuda
Memset
(
results
,
0
,
sizeof
(
SIZE_TYPE
)
*
node_size
);
any
Memset
<
DEV
>
(
results
,
0
,
sizeof
(
SIZE_TYPE
)
*
node_size
);
SIZE_TYPE
*
bitmap
;
cuda
Malloc
(
&
bitmap
,
sizeof
(
SIZE_TYPE
)
*
((
node_size
-
1
)
/
32
+
1
));
cuda
Memset
(
bitmap
,
0
,
sizeof
(
SIZE_TYPE
)
*
((
node_size
-
1
)
/
32
+
1
));
any
Malloc
<
DEV
>
((
void
**
)
&
bitmap
,
sizeof
(
SIZE_TYPE
)
*
((
node_size
-
1
)
/
32
+
1
));
any
Memset
<
DEV
>
(
bitmap
,
0
,
sizeof
(
SIZE_TYPE
)
*
((
node_size
-
1
)
/
32
+
1
));
SIZE_TYPE
*
node_queue
;
cuda
Malloc
(
&
node_queue
,
sizeof
(
SIZE_TYPE
)
*
node_size
);
any
Malloc
<
DEV
>
((
void
**
)
&
node_queue
,
sizeof
(
SIZE_TYPE
)
*
node_size
);
SIZE_TYPE
*
node_queue_offset
;
cuda
Malloc
(
&
node_queue_offset
,
sizeof
(
SIZE_TYPE
));
any
Malloc
<
DEV
>
((
void
**
)
&
node_queue_offset
,
sizeof
(
SIZE_TYPE
));
SIZE_TYPE
*
edge_queue
;
cuda
Malloc
(
&
edge_queue
,
sizeof
(
SIZE_TYPE
)
*
edge_size
);
any
Malloc
<
DEV
>
((
void
**
)
&
edge_queue
,
sizeof
(
SIZE_TYPE
)
*
edge_size
);
SIZE_TYPE
*
edge_queue_offset
;
cuda
Malloc
(
&
edge_queue_offset
,
sizeof
(
SIZE_TYPE
));
any
Malloc
<
DEV
>
((
void
**
)
&
edge_queue_offset
,
sizeof
(
SIZE_TYPE
));
// init
SIZE_TYPE
host_num
[
1
];
host_num
[
0
]
=
start_node
;
cuda
Memcpy
(
node_queue
,
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
any
Memcpy
<
CPU
,
DEV
>
(
node_queue
,
host_num
,
sizeof
(
SIZE_TYPE
));
host_num
[
0
]
=
1
<<
(
start_node
%
32
);
cuda
Memcpy
(
&
bitmap
[
start_node
/
32
],
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
any
Memcpy
<
CPU
,
DEV
>
(
&
bitmap
[
start_node
/
32
],
host_num
,
sizeof
(
SIZE_TYPE
));
host_num
[
0
]
=
1
;
cuda
Memcpy
(
node_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
cuda
Memcpy
(
&
results
[
start_node
],
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
any
Memcpy
<
CPU
,
DEV
>
(
node_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
));
any
Memcpy
<
CPU
,
DEV
>
(
&
results
[
start_node
],
host_num
,
sizeof
(
SIZE_TYPE
));
SIZE_TYPE
level
=
1
;
const
SIZE_TYPE
THREADS_NUM
=
256
;
...
...
@@ -278,25 +280,25 @@ __host__ void gpma_bfs(KEY_TYPE *keys, VALUE_TYPE *values, SIZE_TYPE *row_offset
// gather
SIZE_TYPE
BLOCKS_NUM
=
CALC_BLOCKS_NUM
(
THREADS_NUM
,
host_num
[
0
]);
host_num
[
0
]
=
0
;
cuda
Memcpy
(
edge_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
any
Memcpy
<
CPU
,
DEV
>
(
edge_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
));
gpma_bfs_gather_kernel
<
THREADS_NUM
><<<
BLOCKS_NUM
,
THREADS_NUM
>>>
(
node_queue
,
node_queue_offset
,
edge_queue
,
edge_queue_offset
,
keys
,
values
,
row_offsets
);
// contract
level
++
;
cuda
Memcpy
(
node_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyHostToDevice
);
cuda
Memcpy
(
host_num
,
edge_queue_offset
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyDeviceToHost
);
any
Memcpy
<
CPU
,
DEV
>
(
node_queue_offset
,
host_num
,
sizeof
(
SIZE_TYPE
));
any
Memcpy
<
DEV
,
CPU
>
(
host_num
,
edge_queue_offset
,
sizeof
(
SIZE_TYPE
));
BLOCKS_NUM
=
CALC_BLOCKS_NUM
(
THREADS_NUM
,
host_num
[
0
]);
gpma_bfs_contract_kernel
<
THREADS_NUM
><<<
BLOCKS_NUM
,
THREADS_NUM
>>>
(
edge_queue
,
edge_queue_offset
,
node_queue
,
node_queue_offset
,
level
,
results
,
bitmap
);
cuda
Memcpy
(
host_num
,
node_queue_offset
,
sizeof
(
SIZE_TYPE
)
,
cudaMemcpyDeviceToHost
);
any
Memcpy
<
DEV
,
CPU
>
(
host_num
,
node_queue_offset
,
sizeof
(
SIZE_TYPE
));
if
(
0
==
host_num
[
0
])
break
;
}
cudaFree
(
bitmap
);
cudaFree
(
node_queue
);
cudaFree
(
node_queue_offset
);
cudaFree
(
edge_queue
);
cudaFree
(
edge_queue_offset
);
anyFree
<
DEV
>
(
bitmap
);
anyFree
<
DEV
>
(
node_queue
);
anyFree
<
DEV
>
(
node_queue_offset
);
anyFree
<
DEV
>
(
edge_queue
);
anyFree
<
DEV
>
(
edge_queue_offset
);
}
This diff is collapsed.
Click to expand it.
gpma_bfs_demo.cu
+
2
−
2
View file @
e6fbb568
...
...
@@ -70,7 +70,7 @@ int main(int argc, char **argv) {
cudaDeviceSynchronize
();
LOG_TIME
(
"before first bfs"
)
gpma_bfs
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
gpma_bfs
<
GPU
>
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
int
reach_nodes
=
node_size
-
thrust
::
count
(
bfs_result
.
begin
(),
bfs_result
.
end
(),
0
);
printf
(
"start from node %d, number of reachable nodes: %d
\n
"
,
bfs_start_node
,
reach_nodes
);
...
...
@@ -98,7 +98,7 @@ int main(int argc, char **argv) {
printf
(
"Graph is updated.
\n
"
);
LOG_TIME
(
"before second bfs"
)
gpma_bfs
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
gpma_bfs
<
GPU
>
(
RAW_PTR
(
gpma
.
keys
),
RAW_PTR
(
gpma
.
values
),
RAW_PTR
(
gpma
.
row_offset
),
node_size
,
edge_size
,
bfs_start_node
,
RAW_PTR
(
bfs_result
));
reach_nodes
=
node_size
-
thrust
::
count
(
bfs_result
.
begin
(),
bfs_result
.
end
(),
0
);
printf
(
"start from node %d, number of reachable nodes: %d
\n
"
,
bfs_start_node
,
reach_nodes
);
LOG_TIME
(
"after second bfs"
)
...
...
This diff is collapsed.
Click to expand it.
utils.cuh
+
8
−
0
View file @
e6fbb568
...
...
@@ -85,6 +85,14 @@ __host__ __device__ void anyFree<GPU>(void *ptr) {
cErr
(
cudaFree
(
ptr
));
}
template
<
dev_type_t
DEV
>
void
anyMemset
(
void
*
dst
,
int
value
,
size_t
count
)
{
if
(
DEV
==
GPU
)
cErr
(
cudaMemset
(
dst
,
value
,
count
));
else
memset
(
dst
,
value
,
count
);
}
template
<
dev_type_t
DEV_SRC
,
dev_type_t
DEV_DST
>
void
anyMemcpy
(
void
*
dst
,
const
void
*
src
,
size_t
count
)
{
cudaMemcpyKind
kind
=
DEV_SRC
==
GPU
?
(
DEV_DST
==
GPU
?
cudaMemcpyDeviceToDevice
:
cudaMemcpyDeviceToHost
)
:
(
DEV_DST
==
GPU
?
cudaMemcpyHostToDevice
:
cudaMemcpyHostToHost
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment