
Publikationen des Projekts
Jomaa, Hadi S.; Schmidt-Thieme, Lars; Grabocka, Josif Dataset2Vec: Learning Dataset Meta-Features Journal Article In: Data Mining and Knowlege Discovery, 10618 (0737), pp. 22, 2021. Abstract | Links | BibTeX | Schlagwörter: Hyperparameter Optimization, Meta-feature Learning, Meta-learning2021
@article{HadiS.Jomaa,
title = {Dataset2Vec: Learning Dataset Meta-Features},
author = {Hadi S. Jomaa and Lars Schmidt-Thieme and Josif Grabocka},
url = {https://arxiv.org/abs/1905.11063},
doi = {10.1007/s10618-021-00737-9},
year = {2021},
date = {2021-01-01},
journal = {Data Mining and Knowlege Discovery},
volume = {10618},
number = {0737},
pages = {22},
abstract = {Meta-learning, or learning to learn, is a machine learning approach that utilizes prior learning experiences to expedite the learning process on unseen tasks. As a data-driven approach, meta-learning requires meta-features that represent the primary learning tasks or datasets, and are estimated traditionally as engineered dataset statistics that require expert domain knowledge tailored for every meta-task. In this paper, first, we propose a meta- feature extractor called Dataset2Vec that combines the versatility of engineered dataset meta-features with the expressivity of meta-features learned by deep neural networks. Primary learning tasks or datasets are represented as hierarchical sets, i.e., as a set of sets, esp. as a set of predictor/target pairs, and then a DeepSet architecture is employed to regress meta-features on them. Second, we propose a novel auxiliary meta-learning task with abundant data called dataset similarity learning that aims to predict if two batches stem from the same dataset or different ones. In an experiment on a large-scale hyperparameter optimization task for 120 UCI datasets with varying schemas as a meta-learning task, we show that the meta-features of Dataset2Vec outperform the expert engineered meta-features and thus demonstrate the usefulness of learned meta-features for datasets with varying schemas for the first time.},
keywords = {Hyperparameter Optimization, Meta-feature Learning, Meta-learning},
pubstate = {published},
tppubtype = {article}
}