@PHDTHESIS{ 2025:1324891279, title = {Holistic patient representation learning and automatic annotation of electronic health records}, year = {2025}, url = "https://tede2.pucrs.br/tede2/handle/tede/11735", abstract = "Patient representation learning is the use of artificial intelligence technologies to reinterpret known patient data, extracted from Electronic Health Records, in a way that allows machine learning models to predict data and outcomes that could help medical professionals in diagnosis and the administration of proper care. It is important to note that medical data is tied to its place of origin. To deal with such a vital aspect to the development of national computational medicine solutions, we developed BRATECA, a collection of Brazilian tertiary care hospital data. This collection is open for credentialed access and was the largest collection of Brazilian medical data at the time of its release. Utilizing this collection in patient flow tasks, we achieved results of up to 0.88 F1 in patient Admission Prediction and up to 0.84 F1 for patient Extended Stay Prediction. We also developed an architecture for automatic annotation of social determinants of health in Electronic Health Records, which was validated on the US intensive care data collection MIMIC-III, where we achieved correlations of more than 0.8 measured in Cohen?s kappa for all annotation categories between our automatic annotation and human annotations.", publisher = {Pontif?cia Universidade Cat?lica do Rio Grande do Sul}, scholl = {Programa de P?s-Gradua??o em Ci?ncia da Computa??o}, note = {Escola Polit?cnica} }