diff --git a/.gitmodules b/.gitmodules index 775e21879..8dae6fadd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -38,13 +38,9 @@ [submodule "third-party/ParResKernels"] path = third-party/ParResKernels url = https://github.com/Shillaker/Kernels.git -[submodule "third-party/LULESH"] - path = third-party/LULESH - url = https://github.com/mfournial/LULESH.git [submodule "third-party/wasi-libc"] - path = third-party/wasi-libc - url = https://github.com/Shillaker/wasi-libc - + path = third-party/wasi-libc + url = https://github.com/Shillaker/wasi-libc [submodule "third-party/wamr"] path = third-party/wamr url = https://github.com/bytecodealliance/wasm-micro-runtime.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 90ed521d4..f1de5b5dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ if (FAASM_WAMR_SUPPORT) set(WAMR_BUILD_AOT 0) set(WAMR_BUILD_LIBC_WASI 1) set(WAMR_BUILD_LIBC_BUILTIN 0) -endif() +endif () # Library type (for shared libraries) if (FAASM_STATIC_LIBS) @@ -51,6 +51,12 @@ else () endfunction() endif () +option(FAASM_OMP_PTS "PREALLOCATE THREAD STACK: Only available for local WasmMP and non recursive levels") +if (${FAASM_OMP_PTS}) + message("-- Activated OMP_PTS: OpenMP Thread stack preallocation feature") + add_compile_definitions(OMP_PTS) +endif () + # Switch on WAVM stack traces in debug (potential performance gain?) set(WAVM_ENABLE_UNWIND ON CACHE BOOL "WAVM unwind") #if(CMAKE_BUILD_TYPE MATCHES Debug) @@ -64,11 +70,14 @@ add_definitions(-DDLL_EXPORT=) add_definitions(-DDLL_IMPORT=) # Faasm profiling -add_definitions(-DFAASM_PROFILE_ON=0) +option(FAASM_PERF_PROFILING "Turn on profiling features as described in debugging.md") +option(FAASM_SELF_PROFILING "Turn on system profiling logged at tracing level") + +if (${FAASM_SELF_PROFILING}) + add_compile_definitions(PROFILE_ALL) +endif () -# Custom LLVM build (also for profiling) -set(FAASM_CUSTOM_LLVM 0) -if (${FAASM_CUSTOM_LLVM}) +if (${FAASM_PERF_PROFILING}) # In accordance with bin/build_llvm_perf.sh and LLVM version for WAVM set(LLVM_DIR /usr/local/code/llvm-perf/build/lib/cmake/llvm) message(STATUS "Using custom LLVM at ${LLVM_DIR} for profiling") @@ -143,10 +152,10 @@ else () # WAVM add_subdirectory(third-party/WAVM) - if(FAASM_WAMR_SUPPORT) - include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) - add_library(libwamr ${WAMR_RUNTIME_LIB_SOURCE}) - endif() + if (FAASM_WAMR_SUPPORT) + include(${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) + add_library(libwamr ${WAMR_RUNTIME_LIB_SOURCE}) + endif () # Faasm functions add_subdirectory(func) diff --git a/docs/debugging.md b/docs/debugging.md index 6255db0a3..06f063d05 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -5,8 +5,10 @@ As Faasm functions are compiled to WebAssembly and executed using [WAVM](https://github.com/WAVM/WAVM/), any general tips that apply in WAVM also apply to Faasm. -The instructions below assume that you're building Faasm locally as per the [local dev](local_dev.md) -instructions and that Faasm's built executables are on your `PATH`. +The instructions below assume that +- you're building Faasm locally as per the [local dev](local_dev.md) instructions, +- that you built the `func_sym` target, +- that Faasm's built executables are on your `PATH`. ### Symbols @@ -53,8 +55,8 @@ To set things up you need to do the following: - Run the `perf.yml` Ansible playbook to set up `perf` - Create a build of LLVM with perf support by running `./bin/build/llvm_perf` (this takes ages) -- Modify the top-level `CMakeLists.txt` to set `FAASM_CUSTOM_LLVM` to `1` -- Rebuild the target you want to profile +- Turn on the `FAASM_PERF_PROFILING` option in you CMake build configuration. (`ccmake ` makes this easy). +- Rebuild `codegen_func`, and your runner to build and run the target you want to profile, Once this is done you can use perf as described [here](https://lwn.net/Articles/633846/), i.e.: diff --git a/faasmcli/faasmcli/tasks/libs.py b/faasmcli/faasmcli/tasks/libs.py index 6a7106ab1..e25d65d5c 100644 --- a/faasmcli/faasmcli/tasks/libs.py +++ b/faasmcli/faasmcli/tasks/libs.py @@ -70,10 +70,11 @@ def _build_faasm_lib(dir_name, clean, verbose): clean_dir(build_dir, clean) - verbose_str = "VERBOSE=1" if verbose else "" + verbose_str = "-s" if verbose else "" build_cmd = [ verbose_str, "cmake", + "-G Ninja" "-DFAASM_BUILD_TYPE=wasm", "-DCMAKE_BUILD_TYPE=Release", "-DCMAKE_TOOLCHAIN_FILE={}".format(FAASM_TOOLCHAIN_FILE), @@ -87,15 +88,10 @@ def _build_faasm_lib(dir_name, clean, verbose): if res != 0: exit(1) - res = call("{} make".format(verbose_str), shell=True, cwd=build_dir) + res = call("ninja {} install".format(verbose_str), shell=True, cwd=build_dir) if res != 0: exit(1) - res = call("make install", shell=True, cwd=build_dir) - if res != 0: - exit(1) - - @task def faasm(ctx, clean=False, lib=None, verbose=False): """ @@ -134,6 +130,30 @@ def rust(ctx, clean=False, verbose=False): """ _build_faasm_lib("rust", clean, verbose) +@task +def malloc(ctx, clean=False): + """ + Compile and install dlmalloc + """ + work_dir = join(PROJ_ROOT, "third-party", "malloc") + build_dir = join(PROJ_ROOT, "build", "malloc") + + clean_dir(build_dir, clean) + + build_cmd = [ + "cmake", + "-G Ninja", + "-DCMAKE_BUILD_TYPE=Release", + "-DCMAKE_TOOLCHAIN_FILE={}".format(FAASM_TOOLCHAIN_FILE), + work_dir, + ] + + build_cmd_str = " ".join(build_cmd) + print(build_cmd_str) + + call(build_cmd_str, shell=True, cwd=build_dir) + call("ninja install", shell=True, cwd=build_dir) + @task def fake(ctx, clean=False): @@ -181,11 +201,11 @@ def fake(ctx, clean=False): @task -def lulesh(ctx, mpi=False, omp=False, clean=True, debug=False, cp=True): +def lulesh(ctx, lulesh_dir, mpi=False, omp=False, clean=True, debug=False, cp=True): """ Compile and install the LULESH code """ - work_dir = join(THIRD_PARTY_DIR, "LULESH") + work_dir = lulesh_dir if omp and mpi: build_dir = "ompi" diff --git a/func/omp/intel_nstreams.cpp b/func/omp/intel_nstreams.cpp index 8a5d9af72..883fffa2d 100644 --- a/func/omp/intel_nstreams.cpp +++ b/func/omp/intel_nstreams.cpp @@ -152,8 +152,8 @@ FAASM_MAIN_FUNC() printf("OpenMP stream triad: A = B + scalar*C\n"); // Faasm: does not support arguments: - nthread_input = 4; - iterations = 1000; + nthread_input = 2; + iterations = 1'000; length = 10000000; offset = 0; /* FAASM, use defaults specified above diff --git a/func/omp/intel_synch_p2p.cpp b/func/omp/intel_synch_p2p.cpp index d612cfdbc..9cfd11f43 100644 --- a/func/omp/intel_synch_p2p.cpp +++ b/func/omp/intel_synch_p2p.cpp @@ -105,8 +105,8 @@ FAASM_MAIN_FUNC() { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("OpenMP pipeline execution on 2D grid\n"); - nthread_input = 4; - iterations = 100; + nthread_input = 20; + iterations = 1000; m = 10000; n = 1000; diff --git a/func/omp/intel_transpose.cpp b/func/omp/intel_transpose.cpp index fe013b74e..50dafec4e 100644 --- a/func/omp/intel_transpose.cpp +++ b/func/omp/intel_transpose.cpp @@ -101,7 +101,7 @@ FAASM_MAIN_FUNC() { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("OpenMP Matrix transpose: B = A^T\n"); - nthread_input = 4; + nthread_input = 20; iterations = 300; order = 2000; /* Faasm - Hardcode the default diff --git a/include/util/timing.h b/include/util/timing.h index f27ce56fe..b532606ec 100644 --- a/include/util/timing.h +++ b/include/util/timing.h @@ -3,7 +3,7 @@ #include #include -#ifdef FAASM_PROFILE_ON +#ifdef PROFILE_ALL #define PROF_START(name) const util::TimePoint name = util::startTimer(); #define PROF_END(name) util::logEndTimer(#name, name); #else diff --git a/include/wasm/WasmModule.h b/include/wasm/WasmModule.h index 8aa602939..b6bc4e6b5 100644 --- a/include/wasm/WasmModule.h +++ b/include/wasm/WasmModule.h @@ -115,9 +115,7 @@ namespace wasm { virtual void doRestore(std::istream &inStream) = 0; void prepareArgcArgv(const message::Message &msg); - - void prepareOpenMPContext(const message::Message &msg); -}; + }; // ----- Global functions ----- message::Message *getExecutingCall(); diff --git a/include/wavm/PlatformThreadPool.h b/include/wavm/PlatformThreadPool.h new file mode 100644 index 000000000..165fd5362 --- /dev/null +++ b/include/wavm/PlatformThreadPool.h @@ -0,0 +1,51 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace wasm { + + using namespace WAVM; + class WAVMWasmModule; + + namespace openmp { + struct LocalThreadArgs; + } + + class PlatformThreadPool { + public: + PlatformThreadPool(size_t numThreads, WAVMWasmModule *module); + + friend I64 workerEntryFunc(void* _args); + + std::future runThread(openmp::LocalThreadArgs &&threadArgs); + + ~PlatformThreadPool(); + + private: + std::queue, openmp::LocalThreadArgs>> tasks; + std::vector workers; + + std::mutex mutexQueue; + std::condition_variable condition; + bool stop = false; + }; + + struct WorkerArgs { + U32 stackTop; + PlatformThreadPool *pool; + }; + +} + diff --git a/include/wavm/WAVMWasmModule.h b/include/wavm/WAVMWasmModule.h index 71f67a9e1..56a84f390 100644 --- a/include/wavm/WAVMWasmModule.h +++ b/include/wavm/WAVMWasmModule.h @@ -1,11 +1,11 @@ #pragma once -#include - #include #include #include +#include + using namespace WAVM; namespace wasm { @@ -17,6 +17,8 @@ namespace wasm { struct WasmThreadSpec; + class PlatformThreadPool; + class WAVMWasmModule : public WasmModule, Runtime::Resolver { public: WAVMWasmModule(); @@ -112,6 +114,10 @@ namespace wasm { int getDataOffsetFromGOT(const std::string &name); + U32 allocateThreadStack(); + + std::unique_ptr &getPool(); + protected: void doSnapshot(std::ostream &outStream) override; @@ -172,6 +178,10 @@ namespace wasm { void syncPythonFunctionFile(const message::Message &msg); void executeRemoteOMP(message::Message &msg); + + void prepareOpenMPContext(const message::Message &msg); + + std::unique_ptr ompPool; }; WAVMWasmModule *getExecutingModule(); @@ -182,5 +192,6 @@ namespace wasm { Runtime::ContextRuntimeData *contextRuntimeData; Runtime::Function *func; IR::UntaggedValue *funcArgs; + U32 stackTop; }; } diff --git a/include/wavm/openmp/Level.h b/include/wavm/openmp/Level.h index 685e45080..23f54bf71 100644 --- a/include/wavm/openmp/Level.h +++ b/include/wavm/openmp/Level.h @@ -1,10 +1,11 @@ #pragma once -#include #include + #include #include #include +#include namespace wasm { namespace openmp { @@ -20,6 +21,7 @@ namespace wasm { // Global variables controlled by level master class Level { public: + // Defaults set to mimic Clang 9.0.1 behaviour const int depth = 0; // Number of nested OpenMP constructs, 0 for serial code const int effectiveDepth = 0; // Number of parallel regions (> 1 thread) above this level int maxActiveLevel = 1; // Max number of effective parallel regions allowed from the top @@ -29,8 +31,6 @@ namespace wasm { // TODO - This implementation limits to one lock for all critical sections at a level. // Mention in report (maybe fix looking at the lck address and doing a lookup on it though?) std::mutex criticalSection; // Mutex used in critical sections. - - // Defaults set to mimic Clang 9.0.1 behaviour Level() = default; // Local constructor @@ -54,9 +54,11 @@ namespace wasm { class SingleHostLevel : public Level { public: + SingleHostLevel() = default; - SingleHostLevel(const std::shared_ptr& parent, int numThreads); + SingleHostLevel(const std::shared_ptr &parent, int numThreads); + ReduceTypes reductionMethod() override; ~SingleHostLevel() = default; diff --git a/include/wavm/openmp/openmp.h b/include/wavm/openmp/openmp.h new file mode 100644 index 000000000..158f4c62a --- /dev/null +++ b/include/wavm/openmp/openmp.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +namespace wasm { + + namespace openmp { + struct LocalThreadArgs { + int tid = 0; + std::shared_ptr level = nullptr; + WAVMWasmModule *parentModule; + message::Message *parentCall; + WasmThreadSpec spec; + }; + } + +} diff --git a/src/runner/CMakeLists.txt b/src/runner/CMakeLists.txt index 51297fe47..0a5c36e7a 100644 --- a/src/runner/CMakeLists.txt +++ b/src/runner/CMakeLists.txt @@ -15,3 +15,6 @@ target_link_libraries(simple_runner ${RUNNER_LIBS}) add_executable(func_sym func_sym.cpp) target_link_libraries(func_sym ${RUNNER_LIBS}) + +add_executable(bench_omp bench_omp.cpp) +target_link_libraries(bench_omp ${RUNNER_LIBS}) diff --git a/src/runner/bench_omp.cpp b/src/runner/bench_omp.cpp new file mode 100644 index 000000000..e5ee97014 --- /dev/null +++ b/src/runner/bench_omp.cpp @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include +#include + +void wasmRun(const std::string &user, const std::string &funcName, int wasmIterations, const std::string &outfile); +void wasmRunIteration(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, + int num_threads, wasm::WAVMWasmModule module); + +int main(int argc, char *argv[]) { + util::initLogging(); + const std::shared_ptr &logger = util::getLogger(); + + if (argc != 3) { + logger->error("Usage:\nbench_omp "); + return 1; + } + + std::string user = "omp"; + std::string funcName = argv[1]; + int wasmIterations = std::stoi(argv[2]); + + std::string outfile = std::string("/usr/local/code/faasm/wasm/omp/") + funcName + "/bench_wasm.csv"; + // Clean up for container (and ensuring we will be writing to the same file + boost::filesystem::remove(outfile); + + logger->info("Benchmarking {} ({}x wasm)", funcName, wasmIterations); + + wasmRun(user, funcName, wasmIterations, outfile); + + logger->info("Finished benchmark - {}", funcName); + return 0; +} + +void wasmRun(const std::string &user, const std::string &funcName, int wasmIterations, const std::string &outfile) { + + const std::shared_ptr &logger = util::getLogger(); + + std::ofstream profOut; + profOut.open(outfile, std::fstream::app); + + logger->info("Running Wasm benchmark"); + if (!boost::filesystem::exists(outfile)) { + throw std::runtime_error("Could not find native benchmark output at " + outfile); + } + + std::vector iterations = {200000l, 20000000l, 200000000l}; + std::vector iter_names = {"Tiny,", "Small,", "Big,"}; + + message::Message call = util::messageFactory(user, funcName); + module_cache::WasmModuleCache &moduleCache = module_cache::getWasmModuleCache(); + wasm::WAVMWasmModule &cachedModule = moduleCache.getCachedModule(call); + + const util::TimePoint wasmTp = util::startTimer(); + + for (int run = 1; run <= wasmIterations; run++) { + wasmRunIteration(profOut, call, run, "2", 20, cachedModule); +// logger->info("WASM - {} ({}/{})", funcName, run, wasmIterations); +// for (size_t i = 0; i < iterations.size(); i++) { +// for (int num_threads = 2; num_threads < 25; num_threads += 2) { +// wasmRunIteration(profOut, call, iterations[i], iter_names[i], num_threads, cachedModule); +// } +// } + } + + const long wasmTime = util::getTimeDiffMillis(wasmTp); + logger->info("Done executing native in {} ms", wasmTime); + + + profOut.flush(); + profOut.close(); +} + +void wasmRunIteration(std::ofstream &profOut, message::Message &call, long num_iterations, const std::string &iteration_name, + int num_threads, wasm::WAVMWasmModule cachedModule) { +// auto args = fmt::format("{} {} 0", num_threads, num_iterations); +// call.set_cmdline(args.c_str()); + + const util::TimePoint iterationTp = util::startTimer(); + wasm::WAVMWasmModule module{cachedModule}; + module.execute(call); + const long wasmIterationTime = util::getTimeDiffMillis(iterationTp); + + profOut << num_threads << ",wasm-pool," << wasmIterationTime << std::endl; +} diff --git a/src/wasm/WasmModule.cpp b/src/wasm/WasmModule.cpp index 91a2da452..b7db7ab84 100644 --- a/src/wasm/WasmModule.cpp +++ b/src/wasm/WasmModule.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -194,21 +193,4 @@ namespace wasm { } } - void WasmModule::prepareOpenMPContext(const message::Message &msg) { - std::shared_ptr ompLevel; - - if (msg.has_ompdepth()) { - ompLevel = std::static_pointer_cast( - std::make_shared(msg.ompdepth(), - msg.ompeffdepth(), - msg.ompmal(), - msg.ompnumthreads())); - } else { - ompLevel = std::static_pointer_cast( - std::make_shared()); - } - - openmp::setTLS(msg.ompthreadnum(), ompLevel); - } - } diff --git a/src/wavm/CMakeLists.txt b/src/wavm/CMakeLists.txt index c4cdbdfb2..22410e1c8 100644 --- a/src/wavm/CMakeLists.txt +++ b/src/wavm/CMakeLists.txt @@ -4,6 +4,7 @@ include_directories( ) set(HEADERS + "${FAASM_INCLUDE_DIR}/wavm/PlatformThreadPool.h" "${FAASM_INCLUDE_DIR}/wavm/WAVMWasmModule.h" ) @@ -24,6 +25,7 @@ set(LIB_FILES mpi.cpp network.cpp openmp.cpp + PlatformThreadPool.cpp process.cpp rust.cpp scheduling.cpp diff --git a/src/wavm/PlatformThreadPool.cpp b/src/wavm/PlatformThreadPool.cpp new file mode 100644 index 000000000..2b1d1c34f --- /dev/null +++ b/src/wavm/PlatformThreadPool.cpp @@ -0,0 +1,82 @@ +#include "PlatformThreadPool.h" + +#include +#include +#include + +using namespace util; + +namespace wasm { + using namespace openmp; + +// I64 ompThreadEntryFunc(void *threadArgsPtr); + + I64 workerEntryFunc(void *_args) { + auto args = reinterpret_cast(_args); + U32 stackTop = args->stackTop; + PlatformThreadPool *pool = args->pool; + delete args; + + for (;;) { + std::promise promise; + LocalThreadArgs threadArgs; + + { + UniqueLock lock(pool->mutexQueue); + pool->condition.wait(lock, [&pool] { return pool->stop || !pool->tasks.empty(); }); + if (pool->stop && pool->tasks.empty()) { + // We're done folks + return 0; + } + auto pair = std::move(pool->tasks.front()); + pool->tasks.pop(); + promise = std::move(pair.first); + threadArgs = std::move(pair.second); + } + + setTLS(threadArgs.tid, threadArgs.level); + setExecutingModule(threadArgs.parentModule); + setExecutingCall(threadArgs.parentCall); + threadArgs.spec.stackTop = stackTop; + promise.set_value(threadArgs.parentModule->executeThreadLocally(threadArgs.spec)); + } + } + + PlatformThreadPool::PlatformThreadPool(size_t numThreads, WAVMWasmModule *module) { + for (size_t i = 0; i < numThreads; ++i) { + // Set up workers arguments including pre-allocating a stack for the threads it will execute + WorkerArgs *workerArgs = new WorkerArgs(); + workerArgs->stackTop = module->allocateThreadStack(); + workerArgs->pool = this; + + // Run worker + workers.emplace_back(Platform::createThread(0, workerEntryFunc, workerArgs)); + } + } + + std::future PlatformThreadPool::runThread(LocalThreadArgs &&threadArgs) { + // Workers pull promises to save futures in them. + std::promise promise; + std::future future = promise.get_future(); + + // Sends works to workers + { + UniqueLock lock(mutexQueue); + tasks.emplace(std::make_pair(std::move(promise), std::move(threadArgs))); + } + + condition.notify_one(); + return future; + } + + PlatformThreadPool::~PlatformThreadPool() { + { + UniqueLock lock(mutexQueue); + stop = true; + } + condition.notify_all(); + for (auto worker : workers) { + Platform::joinThread(worker); + } + } +} diff --git a/src/wavm/WAVMWasmModule.cpp b/src/wavm/WAVMWasmModule.cpp index 75b2a89b5..c5fecfde9 100644 --- a/src/wavm/WAVMWasmModule.cpp +++ b/src/wavm/WAVMWasmModule.cpp @@ -23,8 +23,8 @@ #include #include #include - #include +#include constexpr int THREAD_STACK_SIZE(2 * ONE_MB_BYTES); @@ -736,6 +736,7 @@ namespace wasm { getContextRuntimeData(executionContext), funcInstance, invokeArgs.data(), + getExecutingModule()->allocateThreadStack(), }; // Record the return value @@ -764,6 +765,10 @@ namespace wasm { return wasmPtr; } + U32 WAVMWasmModule::allocateThreadStack() { + return this->mmapMemory(THREAD_STACK_SIZE); + } + U32 WAVMWasmModule::mmapMemory(U32 length) { // Round up to page boundary Uptr pagesRequested = getNumberOfPagesForBytes(length); @@ -1133,7 +1138,7 @@ namespace wasm { I64 WAVMWasmModule::executeThreadLocally(WasmThreadSpec &spec) { const std::shared_ptr &logger = util::getLogger(); // Create a new region for this thread's stack - U32 thisStackBase = getExecutingModule()->mmapMemory(THREAD_STACK_SIZE); + U32 thisStackBase = spec.stackTop; U32 stackTop = thisStackBase + THREAD_STACK_SIZE - 1; // Create a new context for this thread @@ -1265,4 +1270,27 @@ namespace wasm { storage::SharedFiles::syncSharedFile(sharedPath, runtimeFilePath); } + + void WAVMWasmModule::prepareOpenMPContext(const message::Message &msg) { + std::shared_ptr ompLevel; + + if (msg.has_ompdepth()) { + ompLevel = std::static_pointer_cast( + std::make_shared(msg.ompdepth(), + msg.ompeffdepth(), + msg.ompmal(), + msg.ompnumthreads())); + } else { + ompPool = std::make_unique(util::getSystemConfig().maxWorkersPerFunction, this); + ompLevel = std::static_pointer_cast( + std::make_shared()); + } + + openmp::setTLS(msg.ompthreadnum(), ompLevel); + } + + std::unique_ptr &WAVMWasmModule::getPool() { + return ompPool; + } + } diff --git a/src/wavm/openmp.cpp b/src/wavm/openmp.cpp index bc3837912..edb4ea964 100644 --- a/src/wavm/openmp.cpp +++ b/src/wavm/openmp.cpp @@ -1,28 +1,22 @@ -#include "WAVMWasmModule.h" - -#include -#include +#include "wavm/openmp/openmp.h" +#include #include #include #include -#include #include #include - #include +#include +#include +#include +#include + namespace wasm { using namespace openmp; - struct LocalThreadArgs { - int tid = 0; - std::shared_ptr level = nullptr; - wasm::WAVMWasmModule *parentModule; - message::Message *parentCall; - WasmThreadSpec spec; - }; /** * Performs actual static assignment */ @@ -334,14 +328,14 @@ namespace wasm { // Note - must ensure thread arguments are outside loop scope otherwise they do // may not exist by the time the thread actually consumes them - std::vector threadArgs; - threadArgs.reserve(nextNumThreads); +// std::vector threadArgs; +// threadArgs.reserve(nextNumThreads); std::vector> microtaskArgs; microtaskArgs.reserve(nextNumThreads); - std::vector platformThreads; - platformThreads.reserve(nextNumThreads); + std::vector> threadsFutures; + threadsFutures.reserve(nextNumThreads); // Build up arguments for (int threadNum = 0; threadNum < nextNumThreads; threadNum++) { @@ -358,32 +352,25 @@ namespace wasm { // Arguments for spawning the thread // NOTE - CLion auto-format insists on this layout... and clangd really hates C99 extensions - threadArgs.push_back({ - .tid = threadNum, - .level = nextLevel, - .parentModule = parentModule, - .parentCall = parentCall, - .spec = { - .contextRuntimeData = contextRuntimeData, - .func = func, - .funcArgs = microtaskArgs[threadNum].data(), - } - }); - } - - // Create the threads themselves - for (int threadNum = 0; threadNum < nextNumThreads; threadNum++) { - platformThreads.emplace_back(Platform::createThread( - 0, - ompThreadEntryFunc, - &threadArgs[threadNum] - )); + LocalThreadArgs threadArgs = { + .tid = threadNum, + .level = nextLevel, + .parentModule = parentModule, + .parentCall = parentCall, + .spec = { + .contextRuntimeData = contextRuntimeData, + .func = func, + .funcArgs = microtaskArgs[threadNum].data(), + } + }; + + threadsFutures.emplace_back(parentModule->getPool()->runThread(std::move(threadArgs))); } // Await all threads I64 numErrors = 0; - for (auto t: platformThreads) { - numErrors += Platform::joinThread(t); + for (auto &f : threadsFutures) { + numErrors += f.get(); } if (numErrors) { diff --git a/src/wavm/openmp/Level.cpp b/src/wavm/openmp/Level.cpp index 2397cbd5f..6a1b283d9 100644 --- a/src/wavm/openmp/Level.cpp +++ b/src/wavm/openmp/Level.cpp @@ -1,4 +1,4 @@ -#include +#include "wavm/openmp/Level.h" #include #include diff --git a/src/wavm/threads.cpp b/src/wavm/threads.cpp index 9a5868dd9..797834505 100644 --- a/src/wavm/threads.cpp +++ b/src/wavm/threads.cpp @@ -85,6 +85,7 @@ namespace wasm { spec->contextRuntimeData = contextRuntimeData; spec->func = func; spec->funcArgs = threadArgs; + spec->stackTop = thisModule->allocateThreadStack(); auto pArgs = new PThreadArgs(); pArgs->parentModule = thisModule; diff --git a/third-party/LULESH b/third-party/LULESH deleted file mode 160000 index 0bbe5ef17..000000000 --- a/third-party/LULESH +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0bbe5ef1748647d1b119f9ff5531889d1a82520c diff --git a/third-party/malloc/CMakeLists.txt b/third-party/malloc/CMakeLists.txt index f5939e654..5da6b79da 100644 --- a/third-party/malloc/CMakeLists.txt +++ b/third-party/malloc/CMakeLists.txt @@ -10,8 +10,8 @@ add_library(dlmalloc STATIC dlmalloc.c) set_target_properties(dlmalloc PROPERTIES PUBLIC_HEADER malloc.h) install(TARGETS dlmalloc - ARCHIVE DESTINATION ${CMAKE_SYSROOT}/lib - LIBRARY DESTINATION ${CMAKE_SYSROOT}/lib + ARCHIVE DESTINATION ${CMAKE_SYSROOT}/lib/wasm32-wasi + LIBRARY DESTINATION ${CMAKE_SYSROOT}/lib/wasm32-wasi RUNTIME DESTINATION ${CMAKE_SYSROOT}/bin PUBLIC_HEADER DESTINATION ${CMAKE_SYSROOT}/include/ )