<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-18T07:59:54Z</responseDate><request verb="GetRecord" identifier="oai:riubu.ubu.es:10259/6206" metadataPrefix="xoai">https://riubu.ubu.es/oai/request</request><GetRecord><record><header><identifier>oai:riubu.ubu.es:10259/6206</identifier><datestamp>2022-11-21T12:51:46Z</datestamp><setSpec>com_10259_5377</setSpec><setSpec>com_10259_5086</setSpec><setSpec>com_10259_2604</setSpec><setSpec>col_10259_5378</setSpec></header><metadata><metadata xmlns="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.lyncode.com/xoai http://www.lyncode.com/xsd/xoai.xsd">
<element name="dc">
<element name="contributor">
<element name="author">
<element name="none">
<field name="value">Juez Gil, Mario</field>
<field name="authority">747</field>
<field name="confidence">500</field>
<field name="orcid_id"/>
<field name="value">Arnaiz González, Álvar</field>
<field name="authority">39</field>
<field name="confidence">600</field>
<field name="orcid_id">0000-0001-6965-0237</field>
<field name="value">Rodríguez Diez, Juan José</field>
<field name="authority">477</field>
<field name="confidence">600</field>
<field name="orcid_id"/>
<field name="value">López Nozal, Carlos</field>
<field name="authority">322</field>
<field name="confidence">600</field>
<field name="orcid_id">0000-0001-8462-212X</field>
<field name="value">García Osorio, César</field>
<field name="authority">212</field>
<field name="confidence">600</field>
<field name="orcid_id">0000-0002-1206-1084</field>
</element>
</element>
</element>
<element name="date">
<element name="accessioned">
<element name="none">
<field name="value">2021-11-23T08:25:06Z</field>
</element>
</element>
<element name="available">
<element name="none">
<field name="value">2021-11-23T08:25:06Z</field>
</element>
</element>
<element name="issued">
<element name="none">
<field name="value">2021-11</field>
</element>
</element>
<element name="embargoEndDate">
<element name="none"/>
</element>
</element>
<element name="identifier">
<element name="issn">
<element name="none">
<field name="value">0925-2312</field>
</element>
</element>
<element name="uri">
<element name="none">
<field name="value">http://hdl.handle.net/10259/6206</field>
</element>
</element>
<element name="doi">
<element name="none">
<field name="value">10.1016/j.neucom.2021.08.086</field>
</element>
</element>
</element>
<element name="description">
<element name="abstract">
<element name="es">
<field name="value">One of the main goals of Big Data research, is to find new data mining methods that are able to process large amounts of data in acceptable times. In Big Data classification, as in traditional classification, class imbalance is a common problem that must be addressed, in the case of Big Data also looking for a solution that can be applied in an acceptable execution time. In this paper we present Approx-SMOTE, a parallel implementation of the SMOTE algorithm for the Apache Spark framework. The key difference with the original SMOTE, besides parallelism, is that it uses an approximated version of k-Nearest Neighbor which makes it highly scalable. Although an implementation of SMOTE for Big Data already exists (SMOTE-BD), it uses an exact Nearest Neighbor search, which does not make it entirely scalable. Approx-SMOTE on the other hand is able to achieve up to 30 times faster run times without sacrificing the improved classification performance offered by the original SMOTE.</field>
</element>
</element>
<element name="sponsorship">
<element name="es">
<field name="value">“La Caixa” Foundation, under agreement LCF/PR/PR18/51130007. This work was supported by the Junta de Castilla y León under project BU055P20 and by the Ministry of Science and Innovation of Spain under project PID2020-119894 GB-I00, co-financed through European Union FEDER funds. It also was supported through Consejería de Educación of the Junta de Castilla y León and the European Social Fund through a pre-doctoral grant (EDU/1100/2017). This material is based upon work supported by Google Cloud.</field>
</element>
</element>
</element>
<element name="format">
<element name="mimetype">
<element name="none">
<field name="value">application/pdf</field>
</element>
</element>
</element>
<element name="language">
<element name="iso">
<element name="es">
<field name="value">eng</field>
</element>
</element>
</element>
<element name="publisher">
<element name="es">
<field name="value">Elsevier</field>
</element>
</element>
<element name="relation">
<element name="ispartof">
<element name="es">
<field name="value">Neurocomputing. 2021, V. 464, p. 432-437</field>
</element>
</element>
<element name="publisherversion">
<element name="es">
<field name="value">https://doi.org/10.1016/j.neucom.2021.08.086</field>
</element>
</element>
<element name="projectID">
<element name="es">
<field name="value">info:eu-repo/grantAgreement/Fundación Bancaria Caixa d'Estalvis i Pensions de Barcelona//LCF%2FPR%2FPR18%2F51130007</field>
<field name="value">info:eu-repo/grantAgreement/Junta de Castilla y León//BU055P20//Métodos y Aplicaciones Industriales del Aprendizaje Semisupervisado</field>
<field name="value">info:eu-repo/grantAgreement/AEI/Plan Estatal de Investigación Científica y Técnica y de Innovación 2017-2020/PID2020-119894GB-I00/ES/APRENDIZAJE AUTOMATICO CON DATOS ESCASAMENTE ETIQUETADOS PARA LA INDUSTRIA 4.0</field>
</element>
</element>
</element>
<element name="rights">
<element name="*">
<field name="value">Attribution-NonCommercial-NoDerivatives 4.0 Internacional</field>
</element>
<element name="uri">
<element name="*">
<field name="value">http://creativecommons.org/licenses/by-nc-nd/4.0/</field>
</element>
</element>
<element name="accessRights">
<element name="es">
<field name="value">info:eu-repo/semantics/openAccess</field>
</element>
</element>
</element>
<element name="subject">
<element name="es">
<field name="value">SMOTE</field>
<field name="value">Imbalance</field>
<field name="value">Spark</field>
<field name="value">Big data</field>
<field name="value">Data mining</field>
</element>
<element name="other">
<element name="es">
<field name="value">Informática</field>
<field name="value">Computer science</field>
</element>
</element>
</element>
<element name="title">
<element name="es">
<field name="value">Approx-SMOTE: Fast SMOTE for Big Data on Apache Spark</field>
</element>
</element>
<element name="type">
<element name="es">
<field name="value">info:eu-repo/semantics/article</field>
</element>
<element name="hasVersion">
<element name="es">
<field name="value">info:eu-repo/semantics/publishedVersion</field>
</element>
</element>
</element>
</element>
<element name="bundles">
<element name="bundle">
<field name="name">THUMBNAIL</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">Juez-neurocomputing_2021.pdf.jpg</field>
<field name="originalName">Juez-neurocomputing_2021.pdf.jpg</field>
<field name="description">IM Thumbnail</field>
<field name="format">image/jpeg</field>
<field name="size">4933</field>
<field name="url">https://riubu.ubu.es/bitstream/10259/6206/4/Juez-neurocomputing_2021.pdf.jpg</field>
<field name="checksum">5ebbae51252e1549848c2ecdf473e2c9</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">4</field>
</element>
</element>
</element>
<element name="bundle">
<field name="name">LICENSE</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">license.txt</field>
<field name="originalName">license.txt</field>
<field name="format">text/plain; charset=utf-8</field>
<field name="size">999</field>
<field name="url">https://riubu.ubu.es/bitstream/10259/6206/3/license.txt</field>
<field name="checksum">b295bcbce42e2caabeb0c623d3860c06</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">3</field>
</element>
</element>
</element>
<element name="bundle">
<field name="name">CC-LICENSE</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">license_rdf</field>
<field name="originalName">license_rdf</field>
<field name="format">application/rdf+xml; charset=utf-8</field>
<field name="size">805</field>
<field name="url">https://riubu.ubu.es/bitstream/10259/6206/2/license_rdf</field>
<field name="checksum">4460e5956bc1d1639be9ae6146a50347</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">2</field>
</element>
</element>
</element>
<element name="bundle">
<field name="name">ORIGINAL</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">Juez-neurocomputing_2021.pdf</field>
<field name="originalName">Juez-neurocomputing_2021.pdf</field>
<field name="description"/>
<field name="format">application/pdf</field>
<field name="size">1068661</field>
<field name="url">https://riubu.ubu.es/bitstream/10259/6206/1/Juez-neurocomputing_2021.pdf</field>
<field name="checksum">3d2361e59c80bd769742340a4b593ead</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">1</field>
</element>
</element>
</element>
</element>
<element name="others">
<field name="handle">10259/6206</field>
<field name="identifier">oai:riubu.ubu.es:10259/6206</field>
<field name="lastModifyDate">2022-11-21 13:51:46.402</field>
</element>
<element name="repository">
<field name="name">Repositorio Institucional de la Universidad de Burgos</field>
<field name="mail">bubrep@ubu.es</field>
</element>
<element name="license">
<field name="bin">RWwgYXV0b3IgY29tbyDDum5pY28gdGl0dWxhciBkZSBsb3MgZGVyZWNob3MgZGUgcHJvcGllZGFkIGludGVsZWN0dWFsIGRlIGxhIG9icmEsIG8gZGlzcG9uaWVuZG8gZGUgbG9zIGRlYmlkb3MgcGVybWlzb3MgZGUgbG9zIG90cm9zIHRpdHVsYXJlcywgc2kgbG9zIGh1YmllcmEsIHkgZW4gdmlydHVkIGRlIGxvcyBkZXJlY2hvcyBxdWUgbGUgY29uZmllcmUgbGEgbGVnaXNsYWNpw7NuIHZpZ2VudGUgc29icmUgcHJvcGllZGFkIGludGVsZWN0dWFsIHkgZGVyZWNob3MgZGUgYXV0b3IsIA0KQVVUT1JJWkEgYSBsYSBVbml2ZXJzaWRhZCBkZSBCdXJnb3MgYSBkaWZ1bmRpciwgZGUgbWFuZXJhIGdyYXR1aXRhLCBlbCBjb250ZW5pZG8gZGUgbG9zIGFyY2hpdm9zIGRpZ2l0YWxlcyBxdWUgY29ycmVzcG9uZGVuIGFsIGRvY3VtZW50byBkZXNjcml0byBhbnRlcmlvcm1lbnRlLCBjb24gY2Fyw6FjdGVyIG5vIGV4Y2x1c2l2byB5IGRlIG1hbmVyYSBww7pibGljYSBlbiBhY2Nlc28gYWJpZXJ0byBhIHRyYXbDqXMgZGUgSW50ZXJuZXQsIHBhcmEgbG8gcXVlIGxhIEJpYmxpb3RlY2EgcHJvY2VkZXLDoSBhIGFyY2hpdmFybG9zIGVuIGVsIFJlcG9zaXRvcmlvIEluc3RpdHVjaW9uYWwuIEFzaW1pc21vIGF1dG9yaXphIGEgbGEgVW5pdmVyc2lkYWQgZGUgQnVyZ29zIGEgcmVhbGl6YXIgbGFzIHRyYW5zZm9ybWFjaW9uZXMgbmVjZXNhcmlhcyBkZSBmb3JtYXRvLCBubyBkZSBjb250ZW5pZG8sIHBhcmEgZ2FyYW50aXphciBsYSBwcmVzZXJ2YWNpw7NuIHkgZWwgYWNjZXNvIGVuIGVsIGZ1dHVyby4NCg0KRWwgYXV0b3IgZGlzcG9uZSwgZW4gdG9kbyBjYXNvLCBkZWwgZGVyZWNobyBhIHJldm9jYXIgZXN0YSBhdXRvcml6YWNpw7NuLg0KDQpMYSBjZXNpw7NuIGRlIGRlcmVjaG9zIGRlIGVzdGEgb2JyYSBzZSBlbmN1ZW50cmEgc3VqZXRhIGEgbGEgbGVnaXNsYWNpw7NuIHZpZ2VudGUgc29icmUgcHJvcGllZGFkIGludGVsZWN0dWFsIHkgZGVyZWNob3MgZGUgYXV0b3Iu</field>
</element>
</metadata></metadata></record></GetRecord></OAI-PMH>