Parallelization with OpenMP, 2-3x faster than before
This commit is contained in:
@@ -3,6 +3,9 @@ project(RWSim VERSION 1.0)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
find_package(OpenMP REQUIRED)
|
||||
|
||||
add_subdirectory(src)
|
||||
|
||||
target_compile_options(rwsim PUBLIC -Werror -Wall -Wextra -Wconversion -O2)
|
||||
target_link_libraries(rwsim PUBLIC OpenMP::OpenMP_CXX)
|
||||
|
||||
@@ -5,7 +5,7 @@ add_subdirectory(utils)
|
||||
add_subdirectory(experiments)
|
||||
|
||||
add_library(simulation STATIC sims.cpp sims.h)
|
||||
target_link_libraries(simulation PRIVATE utils experiments)
|
||||
target_link_libraries(simulation PRIVATE utils experiments OpenMP::OpenMP_CXX)
|
||||
|
||||
add_executable(
|
||||
rwsim
|
||||
|
||||
53
src/sims.cpp
53
src/sims.cpp
@@ -3,6 +3,7 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <omp.h>
|
||||
|
||||
|
||||
void run_simulation(
|
||||
@@ -20,12 +21,56 @@ void run_simulation(
|
||||
experiment.setup(parameter, optional);
|
||||
|
||||
const auto start = printStart(optional);
|
||||
|
||||
const int num_threads = omp_get_max_threads();
|
||||
|
||||
// Create per-thread RNGs seeded deterministically from the main RNG
|
||||
std::vector<std::mt19937_64> thread_rngs;
|
||||
thread_rngs.reserve(num_threads);
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
thread_rngs.emplace_back(rng());
|
||||
}
|
||||
|
||||
// Create per-thread clones of motion, distribution, and experiment
|
||||
std::vector<std::unique_ptr<motions::BaseMotion>> thread_motions;
|
||||
std::vector<std::unique_ptr<times::BaseDistribution>> thread_dists;
|
||||
std::vector<std::unique_ptr<Experiment>> thread_experiments;
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
thread_motions.push_back(motion.clone());
|
||||
thread_dists.push_back(dist.clone());
|
||||
thread_experiments.push_back(experiment.clone());
|
||||
}
|
||||
|
||||
int steps_done = 0;
|
||||
auto last_print_out = std::chrono::system_clock::now();
|
||||
|
||||
for (int mol_i = 0; mol_i < num_walker; mol_i++) {
|
||||
auto traj = make_trajectory(motion, dist, experiment.tmax(), rng);
|
||||
experiment.accumulate(traj, motion.getInitOmega(), num_walker);
|
||||
last_print_out = printSteps(last_print_out, start, num_walker, mol_i);
|
||||
#pragma omp parallel
|
||||
{
|
||||
const int tid = omp_get_thread_num();
|
||||
auto& local_rng = thread_rngs[tid];
|
||||
auto& local_motion = *thread_motions[tid];
|
||||
auto& local_dist = *thread_dists[tid];
|
||||
auto& local_experiment = *thread_experiments[tid];
|
||||
|
||||
#pragma omp for schedule(static)
|
||||
for (int mol_i = 0; mol_i < num_walker; mol_i++) {
|
||||
auto traj = make_trajectory(local_motion, local_dist, experiment.tmax(), local_rng);
|
||||
local_experiment.accumulate(traj, local_motion.getInitOmega(), num_walker);
|
||||
|
||||
if (tid == 0) {
|
||||
#pragma omp atomic
|
||||
steps_done++;
|
||||
last_print_out = printSteps(last_print_out, start, num_walker, steps_done);
|
||||
} else {
|
||||
#pragma omp atomic
|
||||
steps_done++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge per-thread results
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
experiment.merge(*thread_experiments[i]);
|
||||
}
|
||||
|
||||
experiment.save(motion, dist);
|
||||
|
||||
Reference in New Issue
Block a user