@MASTERSTHESIS{ 2022:1794170664, title = {Exploring the radiomics approach for covid-19 identification in lung computed tomography}, year = {2022}, url = "https://tede2.pucrs.br/tede2/handle/tede/10223", abstract = "The COVID-19 pneumonia outbreak has caused global turmoil and was declared a pandemic by the World Health Organization on March 13, 2020. Chest radiological examinations, such as chest X-rays or CT scans, play a vital role in the diagnosis of COVID-19. Several studies have proposed the use of classification models using radiomic features extracted from the lungs in radiological images, mainly for COVID-19 diagnosis and severity assessment. However, few of these studies explore how feature extraction parameters, such as discretization, impact the extracted features. Therefore, this study aims to implement models for identifying COVID-19 through the radiomic signature while investigating different preprocessing and discretization parameters. Our dataset was composed by 180 (128 COVID and 52 non-COVID) chest CT scans performed at Hospital São Lucas da PUCRS which were divided into training (50\%), validation (25\%), and test (25\%) sets. We performed lung segmentation, applied several filters, and discretized the image with 6 different bin sizes: 1, 5, 10, 25, 50, and 75. Features were extracted from all applied filters and bin sizes. Wavelet and non-wavelet features were merged into 36 combinations of bin sizes with 1774 features for each lung. A classification model was trained with each combination of features and the best three models were chosen for the optimization. We identified some of our limitations and used four alternative strategies to try to overcome them: SMOTE, undersampling, feature selection, and only using features from the original image. The best performance was achieved by SMOTE NW25-1 model with an AUC of 0.800. The best three models for each of these alternative strategies were also optimized. Of the 15 optimized models, the six best were selected for feature importance analysis. The laplacian of gaussian and wavelet filters were the ones that generated the most relevant features. Our results indicate that smaller bin sizes, in a range from 1 to 25 may be further investigated for feature extraction in the original image and most filters. Laplacian of gaussian and wavelet filters may perform better with even smaller bin sizes, with a range from 1 to 10.", publisher = {Pontifícia Universidade Católica do Rio Grande do Sul}, scholl = {Programa de Pós-Graduação em Gerontologia Biomédica}, note = {Escola de Medicina} }