OpenMP parallelization and compiler options

OpenMP parallelization and compiler options

Fellow code developers, 

I've got several years of experience in code parallelization with mpi. Recently I begin to use OpenMP, and I quickly get a lot of problems. Right now the most troubling one is the intel compiler's owen optimization and the my hand-written OpenMP parallelization. Allow me to demonstrate the problem with a simple case:

Suppose we have two functions. Each function loops over a vector and there is no data dependency between these two functions. Now I use openmp to create two threads and let each thread handle one of these two functions. In theory, we should observe that the wall time for the two-thread version is half compared to the one-thread version. In my experiment, this claim is true only when the compiler optimization flag is set to be -O0. If the flag is -O1,it is not valid anymore.

If anyone can offer some insight of the problem, it will be greatly appreciated.

This is the test code:

main.cpp

#include <iostream>

#include <omp.h>

#include <vector>

#include <stdio.h>

#include <chrono>

 

#include "tools.h"

 

#define N 60000000

 

using namespace std;

using namespace chrono;

 

void func(int i, vector<vector<int> > &data) {

for (int j=1; j<N; ++j) {

data[i][j] = data[i][j-1] + data[i][j];

}

}

 

int main(int argc, char *argv[]) {

vector<vector<int> > data(24, vector<int>(N, 1));

 

string hostName, Ip;

if (GetHostInfo(hostName, Ip)) {

}

cout << "hostname: " << hostName << ", ip: " << Ip << endl;

 

auto start = system_clock::now();

 

#pragma omp parallel shared(data)

{

#pragma omp sections

{

#pragma omp section

func(0, data);

#pragma omp section

func(1, data);

#pragma omp section

func(2, data);

#pragma omp section

func(3, data);

#pragma omp section

func(4, data);

#pragma omp section

func(5, data);

#pragma omp section

func(6, data);

#pragma omp section

func(7, data);

#pragma omp section

func(8, data);

#pragma omp section

func(9, data);

#pragma omp section

func(10, data);

#pragma omp section

func(11, data);

#pragma omp section

func(12, data);

#pragma omp section

func(13, data);

#pragma omp section

func(14, data);

#pragma omp section

func(15, data);

#pragma omp section

func(16, data);

#pragma omp section

func(17, data);

#pragma omp section

func(18, data);

#pragma omp section

func(19, data);

#pragma omp section

func(20, data);

#pragma omp section

func(21, data);

#pragma omp section

func(22, data);

#pragma omp section

func(23, data);

}

}

 

auto end = system_clock::now();

auto duration = duration_cast<microseconds>(end-start);

cout << "time: " << double(duration.count()) * microseconds::period::num / microseconds::period::den << "s\n";

return 0;

}

tools.h

#include <iostream> /* cout */

#include <unistd.h>/* gethostname */

#include <netdb.h> /* struct hostent */

#include <arpa/inet.h> /* inet_ntop */

#include <stdlib.h> /* system */

 

bool GetHostInfo(std::string& hostName, std::string& Ip) {

char name[256];

gethostname(name, sizeof(name));

hostName = name;

 

struct hostent* host = gethostbyname(name);

char ipStr[32];

const char* ret = inet_ntop(host->h_addrtype, host->h_addr_list[0], ipStr, sizeof(ipStr));

if (NULL==ret) {

std::cout << "hostname transform to ip failed";

return false;

}

Ip = ipStr;

return true;

}

/*

int main(int argc, char *argv[]) {

std::string hostName;

std::string Ip;

 

bool ret = GetHostInfo(hostName, Ip);

if (true == ret) {

std::cout << "hostname: " << hostName << std::endl;

std::cout << "Ip: " << Ip << std::endl;

}

system("cat /proc/cpuinfo | grep 'core id'");

return 0;

}

*/

 

 

1 post / 0 new
For more complete information about compiler optimizations, see our Optimization Notice.