diff --git a/Hyper_tuner/testdemo/mem-leak/mem_leak.c b/Hyper_tuner/testdemo/mem-leak/mem_leak.c new file mode 100644 index 0000000000000000000000000000000000000000..051f8bd546af7d429e393828a8948054c09f0be6 --- /dev/null +++ b/Hyper_tuner/testdemo/mem-leak/mem_leak.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +long long *mem_leak(long long *n0, long long *n1) +{ + + long long *v = (long long *) calloc(1024,sizeof(long long)); + *v = *n0 + *n1; + return v; +} + +void *child(void *arg) +{ + long long n0 = 0; + long long n1 = 1; + long long *v = NULL; + int n = 2; + for (n = 2; n > 0; n++) { + v = mem_leak(&n0, &n1); + n0 = n1; + n1 = *v; + printf("%dth => %11d\n", n, *v); + sleep(1); + } +} + + +int main(void) +{ + pthread_t tid; + pthread_create(&tid, NULL, child, NULL); + pthread_join(tid, NULL); + printf("main thread exit\n"); + return 0; +} diff --git a/Hyper_tuner/testdemo/mem-leak/mem_leak_fix.c b/Hyper_tuner/testdemo/mem-leak/mem_leak_fix.c new file mode 100644 index 0000000000000000000000000000000000000000..8a5ff749ed0eb00cbb72fe16bd60a5a93f0cb9a7 --- /dev/null +++ b/Hyper_tuner/testdemo/mem-leak/mem_leak_fix.c @@ -0,0 +1,39 @@ +#include +#include +#include +#include + +long long *mem_leak(long long *n0, long long *n1) +{ + + long long *v = (long long *) calloc(1024,sizeof(long long)); + *v = *n0 + *n1; + return v; +} + +void *child(void *arg) +{ + long long n0 = 0; + long long n1 = 1; + long long *v = NULL; + int n = 2; + for (n = 2; n > 0; n++) { + v = mem_leak(&n0, &n1); + n0 = n1; + n1 = *v; + free(v); + printf("%dth => %11d\n", n, *v); + sleep(1); + } +} + + +int main(void) +{ + pthread_t tid; + pthread_create(&tid, NULL, child, NULL); + pthread_join(tid, NULL); + printf("main thread exit\n"); + return 0; +} + diff --git a/Hyper_tuner/testdemo/omp-mpi/memory_bound.c b/Hyper_tuner/testdemo/omp-mpi/memory_bound.c new file mode 100644 index 0000000000000000000000000000000000000000..e55d49d7c5f3327c07f959f56469ccf26fb05be5 --- /dev/null +++ b/Hyper_tuner/testdemo/omp-mpi/memory_bound.c @@ -0,0 +1,41 @@ +#include "utils.h" + +#define MAX_ARRAY_SIZE 409600 +#define LOOP_SIZE 1 +#define BLOCK_SIZE 4096 + +void MemoryBoundBench(Point *pointA, double *ret, int n) +{ + if (pointA == NULL || ret == NULL) { + return; + } + + int i,j; + #pragma omp parallel for + for (i = 0; i< n; i++) { + ret[i] = 0.0; + for (j = 0; j < n; j++){ + ret[i] += pointA[i].x + pointA[j].y; + } + } +} + +int main() +{ + Point *pointA = InitPointsVector(MAX_ARRAY_SIZE); + + double *ret = (double*)malloc(MAX_ARRAY_SIZE * sizeof(double)); + if (ret == NULL) { + printf("ERROR: Memory Allocate Faild!\n"); + exit(1); + } + + uint64_t ts_start = GetTime_ns(); + int i; + for (i = 0; i < LOOP_SIZE; i++) { + MemoryBoundBench(pointA, ret, MAX_ARRAY_SIZE); + } + uint64_t ts_end = GetTime_ns(); + printf("Total Cost Time = %f ms\n", (ts_end - ts_start) / 1000000.0) + return 0; +} diff --git a/Hyper_tuner/testdemo/omp-mpi/memory_bound_mod.c b/Hyper_tuner/testdemo/omp-mpi/memory_bound_mod.c new file mode 100644 index 0000000000000000000000000000000000000000..ded3886c5465171399dbac224e93760f4366b457 --- /dev/null +++ b/Hyper_tuner/testdemo/omp-mpi/memory_bound_mod.c @@ -0,0 +1,44 @@ +#include "utils.h" + +#define MAX_ARRAY_SIZE 409600 +#define LOOP_SIZE 1 +#define BLOCK_SIZE 4096 + + +void MemoryBoundBench_OPT(Point *pointA, double *ret, int n) +{ + if (pointA == NULL || ret == NULL) { + return; + } + + int i,j,k; + for (k = 0; k < n; k += BLOCK_SIZE) { + #pragma omp parallel for + for (i = 0; i< n; i++) { + ret[i] = 0.0; + for (j = 0; j < BLOCK_SIZE; j++){ + ret[i] += pointA[i].x + pointA[j + k].y; + } + } + } +} + +int main() +{ + Point *pointA = InitPointsVector(MAX_ARRAY_SIZE); + + double *ret = (double*)malloc(MAX_ARRAY_SIZE * sizeof(double)); + if (ret == NULL) { + printf("ERROR: Memory Allocate Faild!\n"); + exit(1); + } + + uint64_t ts_start = GetTime_ns(); + int i; + for (i = 0; i < LOOP_SIZE; i++) { + MemoryBoundBench_OPT(pointA, ret, MAX_ARRAY_SIZE); + } + uint64_t ts_end = GetTime_ns(); + printf("Total Cost Time = %f ms\n", (ts_end - ts_start) / 1000000.0) + return 0; +} diff --git a/Hyper_tuner/testdemo/omp-mpi/ring.c b/Hyper_tuner/testdemo/omp-mpi/ring.c new file mode 100644 index 0000000000000000000000000000000000000000..61864edddd6cfcd478281bde6faf59f964decf9b --- /dev/null +++ b/Hyper_tuner/testdemo/omp-mpi/ring.c @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + // Initialize the MPI environment + MPI_Init(NULL, NULL); + // Find out rank, size + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + int token; + // Receive from the lower process and send to the higher process. Take care + // of the special case when you are the first process to prevent deadlock + if (world_rank != 0) { + MPI_Recv(&token, 1, MPI_INT, world_rank - 1, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + printf("Process %d received token %d from process %d\n", world_rank, token, + world_rank - 1); + } else { + // Set the token's value if you are process 0 + token = -1; + } + + // sleep(100); + MPI_Send(&token, 1, MPI_INT, (world_rank + 1) % world_size, 0, + MPI_COMM_WORLD); + // Now process 0 can receive from the last process. This makes sure that at + // least one MPI_Send is initialized before all MPI_Recvs (again, to prevent + // deadlock) + if (world_rank == 0) { + MPI_Recv(&token, 1, MPI_INT, world_size - 1, 0, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + printf("Process %d received token %d from process %d\n", world_rank, token, + world_rank - 1); + } + MPI_Finalize(); +} diff --git a/Hyper_tuner/testdemo/omp-mpi/utils.h b/Hyper_tuner/testdemo/omp-mpi/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..453f21ff0ec001b598f9d64a835e4c308585df11 --- /dev/null +++ b/Hyper_tuner/testdemo/omp-mpi/utils.h @@ -0,0 +1,105 @@ +#ifndef HPC_UTILS_H +#define HPC_UTILS_H + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SEC2NS 1000000000 + +typedef struct Point_t { + double x; + double y; +}Point; + +static inline uint64_t GetTime_ns() +{ + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return ts.tv_sec * SEC2NS + ts.tv_nsec; +} + +static inline uint64_t xorshf96() +{ + static uint64_t x=123456789; + static uint64_t y=362436069; + static uint64_t z=521288629; + uint64_t t; + + x ^= x << 16; + x ^= x >> 5; + x ^= x << 1; + + t = x; + x = y; + y = z; + + z = t ^ x ^ y; + return z; +} + +static inline uint64_t my_random() +{ + return xorshf96(); +} + +static inline double random_double(uint32_t range) +{ + return (my_random() % range) * (double)(1.0); +} + +static inline Point *InitPointsVector(int len) +{ + Point *pointsVector = (Point*)malloc(len * sizeof(Point)); + if (pointsVector == NULL) { + printf("ERROR: Memory Allocate Failed!\n"); + exit(1); + } + int i; + #pragma omp parallel for + for (i = 0; i < len; i++){ + pointsVector[i].x = random_double(len); + pointsVector[i].y = random_double(len); + } + return pointsVector; +} + +static inline double ComputPointDistance(Point *pointA, Point *pointB) +{ + double horizontal, vertical, pointDis; + horizontal = pointA->x - pointB->x; + vertical = pointA->y - pointB->y; + pointDis = sqrt((horizontal * horizontal) + (vertical * vertical)); + return pointDis; +} + +void set_thread_affinity(uint16_t core_id) +{ + cpu_set_t cpu_mask; + cpu_set_t get; + + CPU_ZERO(&cpu_mask); + CPU_SET(core_id, &cpu_mask); + + if(sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask)==-1) + { + printf("set cpu affinity faild ...\n"); + exit(1); + } + + if(sched_getaffinity(0, sizeof(get), &get)==-1) + { + printf("get affinity faild...\n"); + exit(1); + } +} + +#endif