From 6b42051e452f31035993c4bcf006e9f332b9ae21 Mon Sep 17 00:00:00 2001 From: Dominik Demuth Date: Sun, 8 Mar 2026 12:32:30 +0100 Subject: [PATCH] Parallelization with OpenMP, 2-3x faster than before --- CMakeLists.txt | 3 +++ src/CMakeLists.txt | 2 +- src/sims.cpp | 53 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 53 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22f090e..0523d12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,9 @@ project(RWSim VERSION 1.0) set(CMAKE_CXX_STANDARD 17) +find_package(OpenMP REQUIRED) + add_subdirectory(src) target_compile_options(rwsim PUBLIC -Werror -Wall -Wextra -Wconversion -O2) +target_link_libraries(rwsim PUBLIC OpenMP::OpenMP_CXX) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 831339b..bbfa3b5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ add_subdirectory(utils) add_subdirectory(experiments) add_library(simulation STATIC sims.cpp sims.h) -target_link_libraries(simulation PRIVATE utils experiments) +target_link_libraries(simulation PRIVATE utils experiments OpenMP::OpenMP_CXX) add_executable( rwsim diff --git a/src/sims.cpp b/src/sims.cpp index 701c285..f85773b 100644 --- a/src/sims.cpp +++ b/src/sims.cpp @@ -3,6 +3,7 @@ #include #include +#include void run_simulation( @@ -20,12 +21,56 @@ void run_simulation( experiment.setup(parameter, optional); const auto start = printStart(optional); + + const int num_threads = omp_get_max_threads(); + + // Create per-thread RNGs seeded deterministically from the main RNG + std::vector thread_rngs; + thread_rngs.reserve(num_threads); + for (int i = 0; i < num_threads; i++) { + thread_rngs.emplace_back(rng()); + } + + // Create per-thread clones of motion, distribution, and experiment + std::vector> thread_motions; + std::vector> thread_dists; + std::vector> thread_experiments; + for (int i = 0; i < num_threads; i++) { + thread_motions.push_back(motion.clone()); + thread_dists.push_back(dist.clone()); + thread_experiments.push_back(experiment.clone()); + } + + int steps_done = 0; auto last_print_out = std::chrono::system_clock::now(); - for (int mol_i = 0; mol_i < num_walker; mol_i++) { - auto traj = make_trajectory(motion, dist, experiment.tmax(), rng); - experiment.accumulate(traj, motion.getInitOmega(), num_walker); - last_print_out = printSteps(last_print_out, start, num_walker, mol_i); + #pragma omp parallel + { + const int tid = omp_get_thread_num(); + auto& local_rng = thread_rngs[tid]; + auto& local_motion = *thread_motions[tid]; + auto& local_dist = *thread_dists[tid]; + auto& local_experiment = *thread_experiments[tid]; + + #pragma omp for schedule(static) + for (int mol_i = 0; mol_i < num_walker; mol_i++) { + auto traj = make_trajectory(local_motion, local_dist, experiment.tmax(), local_rng); + local_experiment.accumulate(traj, local_motion.getInitOmega(), num_walker); + + if (tid == 0) { + #pragma omp atomic + steps_done++; + last_print_out = printSteps(last_print_out, start, num_walker, steps_done); + } else { + #pragma omp atomic + steps_done++; + } + } + } + + // Merge per-thread results + for (int i = 0; i < num_threads; i++) { + experiment.merge(*thread_experiments[i]); } experiment.save(motion, dist);