@conference{Giner-Miguelez:MODELS:2022,
title = {DescribeML: A Tool for Describing Machine Learning Datasets},
author = {Joan Giner-Miguelez and Abel G\'{o}mez and Jordi Cabot},
doi = {10.1145/3550356.3559087},
isbn = {9781450394673},
year = {2022},
date = {2022-11-09},
urldate = {2022-01-01},
booktitle = {Proceedings of the 25th International Conference on Model Driven Engineering Languages and Systems: Companion Proceedings},
pages = {22\textendash26},
publisher = {Association for Computing Machinery},
address = {Montreal, Quebec, Canada},
series = {MODELS '22},
abstract = {Datasets play a central role in the training and evaluation of machine learning (ML) models. But they are also the root cause of many undesired model behaviors, such as biased predictions. To overcome this situation, the ML community is proposing a data-centric cultural shift, where data issues are given the attention they deserve, for instance, proposing standard descriptions for datasets.In this sense, and inspired by these proposals, we present a model-driven tool to precisely describe machine learning datasets in terms of their structure, data provenance, and social concerns. Our tool aims to facilitate any ML initiative to leverage and benefit from this data-centric shift in ML (e.g., selecting the most appropriate dataset for a new project or better replicating other ML results). The tool is implemented with the Langium workbench as a Visual Studio Code plugin and published as an open-source.},
keywords = {Datasets, DescribeML, Domain-Specific Languages (DSLs), Fairness, Model-Driven Engineering (MDE)},
pubstate = {published},
tppubtype = {conference}
}