<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-05T16:04:50Z</responseDate><request verb="GetRecord" identifier="oai:riubu.ubu.es:10259/5766" metadataPrefix="dim">https://riubu.ubu.es/oai/request</request><GetRecord><record><header><identifier>oai:riubu.ubu.es:10259/5766</identifier><datestamp>2022-11-21T12:30:19Z</datestamp><setSpec>com_10259_4219</setSpec><setSpec>com_10259_5086</setSpec><setSpec>com_10259_2604</setSpec><setSpec>col_10259_4220</setSpec></header><metadata><dim:dim xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="747" confidence="500" orcid_id="">Juez Gil, Mario</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="39" confidence="500" orcid_id="0000-0001-6965-0237">Arnaiz González, Álvar</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="477" confidence="500" orcid_id="">Rodríguez Diez, Juan José</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="212" confidence="500" orcid_id="0000-0002-1206-1084">García Osorio, César</dim:field>
<dim:field mdschema="dc" element="date" qualifier="accessioned">2021-05-14T11:34:01Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="available">2021-05-14T11:34:01Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="issued">2021-09</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="issn">1568-4946</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="uri">http://hdl.handle.net/10259/5766</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="doi">10.1016/j.asoc.2021.107447</dim:field>
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en">Datasets are growing in size and complexity at a pace never seen before, forming ever larger datasets known as Big Data. A common problem for classification, especially in Big Data, is that the numerous examples of the different classes might not be balanced. Some decades ago, imbalanced classification was therefore introduced, to correct the tendency of classifiers that show bias in favor of the majority class and that ignore the minority one. To date, although the number of imbalanced classification methods have increased, they continue to focus on normal-sized datasets and not on the new reality of Big Data. In this paper, in-depth experimentation with ensemble classifiers is conducted in the context of imbalanced Big Data classification, using two popular ensemble families (Bagging and Boosting) and different resampling methods. All the experimentation was launched in Spark clusters, comparing ensemble performance and execution times with statistical test results, including the newest ones based on the Bayesian approach. One very interesting conclusion from the study was that simpler methods applied to unbalanced datasets in the context of Big Data provided better results than complex methods. The additional complexity of some of the sophisticated methods, which appear necessary to process and to reduce imbalance in normal-sized datasets were not effective for imbalanced Big Data.</dim:field>
<dim:field mdschema="dc" element="description" qualifier="sponsorship" lang="es">“la Caixa” Foundation, Spain, under agreement LCF/PR/PR18/51130007. This work was supported by the Junta de Castilla y León, Spain under project BU055P20 (JCyL/FEDER, UE) co-financed through European Union FEDER funds, and by the Consejería de Educación of the Junta de Castilla y León and the European Social Fund, Spain through a pre-doctoral grant (EDU/1100/2017).</dim:field>
<dim:field mdschema="dc" element="format" qualifier="mimetype">application/pdf</dim:field>
<dim:field mdschema="dc" element="language" qualifier="iso" lang="es">eng</dim:field>
<dim:field mdschema="dc" element="publisher" lang="es">Elsevier</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="ispartof" lang="es">Applied Soft Computing. 2021, V. 108, 107447</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="publisherversion" lang="es">https://doi.org/10.1016/j.asoc.2021.107447</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="projectID" lang="es">info:eu-repo/grantAgreement/Fundación Bancaria Caixa d'Estalvis i Pensions de Barcelona//LCF%2FPR%2FPR18%2F51130007</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="projectID" lang="es">info:eu-repo/grantAgreement/Junta de Castilla y León//BU055P20//Métodos y Aplicaciones Industriales del Aprendizaje Semisupervisado</dim:field>
<dim:field mdschema="dc" element="rights" lang="*">Attribution-NonCommercial-NoDerivatives 4.0 Internacional</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="uri" lang="*">http://creativecommons.org/licenses/by-nc-nd/4.0/</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="accessRights" lang="es">info:eu-repo/semantics/openAccess</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Unbalance</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Imbalance</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Ensemble</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Resampling</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Big Data</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Spark</dim:field>
<dim:field mdschema="dc" element="subject" qualifier="other" lang="es">Informática</dim:field>
<dim:field mdschema="dc" element="subject" qualifier="other" lang="en">Computer science</dim:field>
<dim:field mdschema="dc" element="title" lang="en">Experimental evaluation of ensemble classifiers for imbalance in Big Data</dim:field>
<dim:field mdschema="dc" element="type" lang="es">info:eu-repo/semantics/article</dim:field>
<dim:field mdschema="dc" element="type" qualifier="hasVersion" lang="es">info:eu-repo/semantics/publishedVersion</dim:field>
<dim:field mdschema="dc" element="journal" qualifier="title" lang="es">Applied Soft Computing</dim:field>
<dim:field mdschema="dc" element="volume" qualifier="number" lang="es">108</dim:field>
<dim:field mdschema="dc" element="page" qualifier="initial" lang="es">107447</dim:field>
</dim:dim></metadata></record></GetRecord></OAI-PMH>