@MASTERSTHESIS{ 2019:1274587476, title = {Fully-disentangled text-to-image synthesis}, year = {2019}, url = "http://tede2.pucrs.br/tede2/handle/tede/9031", abstract = "Generalization algorithms suffer from several problems. Commonly, deep learning algorithms are prone to overfit a limited amount of data, generating unsatisfactory results for unseen data. In the case of deep generative models, this problem manifests itself differently. The model tends to ignore nuances from the real distribution and to collapse into the most common attributes. To some degree, this is a desired behavior, but this can lead the algorithm to generate images with very reduced diversity, not exploring all attributes provided by the real distribution. Deep generative networks (e.g. GANs) have no explicit objective to encourage diversity during training. Furthermore, GANs were initially designed to generate random realistic samples with no control for the user. We propose a disentanglement-base method to tackle both problems at once for text-to-image synthesis frameworks. First, we force the disentanglement of concepts not described by textual descriptions (e.g. background). Then, we use the learned disentangled representations to provide random combinations for the generator. This guides the generator to a more complete mapping, hence increasing diversity using the same amount of data. With disentangled representations, the framework also gains control over synthesis for each concept. We conduct several experiments and ablation studies to validate our contributions on a singleobject dataset. Results show improvements for both goals and no side-effects for the traditional text-to-image frameworks. Our approach can easily be used on top of other frameworks to increase control, diversity, and realism.", publisher = {Pontifícia Universidade Católica do Rio Grande do Sul}, scholl = {Programa de Pós-Graduação em Ciência da Computação}, note = {Escola Politécnica} }