<?xml version="1.0" encoding="UTF-8"?><?xml-stylesheet type="text/xsl" href="static/style.xsl"?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-04-17T11:31:16Z</responseDate><request verb="GetRecord" identifier="oai:riubu.ubu.es:10259/4814" metadataPrefix="dim">https://riubu.ubu.es/oai/request</request><GetRecord><record><header><identifier>oai:riubu.ubu.es:10259/4814</identifier><datestamp>2022-04-29T12:02:45Z</datestamp><setSpec>com_10259_4219</setSpec><setSpec>com_10259_5086</setSpec><setSpec>com_10259_2604</setSpec><setSpec>col_10259_4220</setSpec></header><metadata><dim:dim xmlns:dim="http://www.dspace.org/xmlns/dspace/dim" xmlns:doc="http://www.lyncode.com/xoai" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="37a0866d-eced-4e47-913c-cdc9943f0a48" confidence="500" orcid_id="">Kuncheva, Ludmila I. .</dim:field>
<dim:field mdschema="dc" element="contributor" qualifier="author" authority="477" confidence="500" orcid_id="">Rodríguez Diez, Juan José</dim:field>
<dim:field mdschema="dc" element="date" qualifier="accessioned">2018-06-18T11:52:23Z</dim:field>
<dim:field mdschema="dc" element="date" qualifier="issued">2018-09</dim:field>
<dim:field mdschema="dc" element="date" qualifier="embargo">2020-09</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="issn">0031-3203</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="uri">http://hdl.handle.net/10259/4814</dim:field>
<dim:field mdschema="dc" element="identifier" qualifier="doi">10.1016/j.patcog.2018.03.012</dim:field>
<dim:field mdschema="dc" element="description" qualifier="abstract" lang="en">High-dimensional data with very few instances are typical in many application domains. Selecting a highly discriminative subset of the original features is often the main interest of the end user. The widely-used feature selection protocol for such type of data consists of two steps. First, features are selected from the data (possibly through cross-validation), and, second, a cross-validation protocol is applied to test a classifier using the selected features. The selected feature set and the testing accuracy are then returned to the user. For the lack of a better option, the same low-sample-size dataset is used in both steps. Questioning the validity of this protocol, we carried out an experiment using 24 high-dimensional datasets, three feature selection methods and five classifier models. We found that the accuracy returned by the above protocol is heavily biased, and therefore propose an alternative protocol which avoids the contamination by including both steps in a single cross-validation loop. Statistical tests verify that the classification accuracy returned by the proper protocol is significantly closer to the true accuracy (estimated from an independent testing set) compared to that returned by the currently favoured protocol.</dim:field>
<dim:field mdschema="dc" element="description" qualifier="sponsorship" lang="en">project RPG-2015-188 funded by The Leverhulme Trust, UK and by project TIN2015-67534-P (MINECO/FEDER, UE) funded by the Ministerio de Economía y Competitividad of the Spanish Government and European Union FEDER funds</dim:field>
<dim:field mdschema="dc" element="format" qualifier="mimetype">application/pdf</dim:field>
<dim:field mdschema="dc" element="language" qualifier="iso" lang="es">eng</dim:field>
<dim:field mdschema="dc" element="publisher" lang="en">Elsevier</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="ispartof" lang="en">Pattern Recognition. 2018, V. 81, p. 660-673</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="publisherversion">https://doi.org/10.1016/j.patcog.2018.03.012</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="projectID">info:eu-repo/grantAgreement/MINECO/TIN2015-67534-P</dim:field>
<dim:field mdschema="dc" element="relation" qualifier="projectID">info:eu-repo/grantAgreementThe Leverhulme Trust/RPG-2015-188</dim:field>
<dim:field mdschema="dc" element="rights">Attribution-NonCommercial-NoDerivatives 4.0 International</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="uri">http://creativecommons.org/licenses/by-nc-nd/4.0/</dim:field>
<dim:field mdschema="dc" element="rights" qualifier="accessRights">info:eu-repo/semantics/openAccess</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Feature selection</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Wide datasets</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Experimental protoco</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Training/testing</dim:field>
<dim:field mdschema="dc" element="subject" lang="en">Cross-validation</dim:field>
<dim:field mdschema="dc" element="subject" qualifier="other" lang="en">Computer science</dim:field>
<dim:field mdschema="dc" element="subject" qualifier="other" lang="es">Informática</dim:field>
<dim:field mdschema="dc" element="title" lang="en">On feature selection protocols for very low-sample-size data</dim:field>
<dim:field mdschema="dc" element="type">info:eu-repo/semantics/article</dim:field>
<dim:field mdschema="dc" element="type" qualifier="hasVersion" lang="en">info:eu-repo/semantics/acceptedVersion</dim:field>
</dim:dim></metadata></record></GetRecord></OAI-PMH>