@MASTERSTHESIS{ 2025:723069584, title = {High-level multi-GPU support for multi-core stream parallelism}, year = {2025}, url = "https://tede2.pucrs.br/tede2/handle/tede/11668", abstract = "Nowadays, computer architectures often rely on graphics processing units (GPUs) to allow massive parallelism exploitation at a lower cost. This parallelism can be particularly advantageous in stream processing, a domain of applications continuously processing a data flow of often unknown size. Nonetheless, the programmer must employ parallel programming to exploit underlying GPU hardware capabilities efficiently. This can be challenging since it involves refactoring algorithms, using parallelism techniques, and knowing about the environment?s hardware, especially when writing portable code, since GPU vendors and generations offer different capabilities. This challenge becomes even more complex in multi-GPU environments; the programmer must choose which strategy to partition their data, which strategy to schedule their tasks onto the GPUs, how to handle communication needs between tasks, and how to perform GPU asynchronous operations. To address these challenges, researchers have focused on investigating efficient programming techniques for GPUs and developing abstractions that simplify the programming process. One such abstraction is SPar, a domain-specific language (DSL) that enables the expression of stream parallelism without sacrificing performance. Recently, an extension was added to SPar that allows parallel code generation for GPUs in streaming applications. To achieve this, SPar performs source-to-source code transformations and generates GPU code using an intermediate library named GSParLib. Nonetheless, SPar supports code generation for a single GPU environment only. In this work, we investigate how to allow multi-GPU code generation for stream processing and investigate state-of-the-art optimizations and techniques for multi-GPU programming targeting multi-core systems. Our contributions are a set of data stream scheduling algorithms for multi-GPUs, which were integrated in the code generation of SPar, transparently supporting multi-GPU usage in multi-core systems. The experimental results demonstrated that it is possible to simplify the exploitation of multi-GPU for stream applications without sacrificing performance by utilizing scheduling policies specifically targeting multi-GPU through code annotations like the ones provided by SPar, achieving similar results to manual implementations targeting multi-GPU while having close to half the number of lines of code.", publisher = {Pontif?cia Universidade Cat?lica do Rio Grande do Sul}, scholl = {Programa de P?s-Gradua??o em Ci?ncia da Computa??o}, note = {Escola Polit?cnica} }