@inproceedings{jin-ji-2024-schema,
title = "Schema-based Data Augmentation for Event Extraction",
author = "Jin, Xiaomeng and
Ji, Heng",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "http://aclanthology.org/2024.lrec-main.1253/",
pages = "14382--14392",
abstract = "Event extraction is a crucial task for semantic understanding and structured knowledge construction. However, the expense of collecting and labeling data for training event extraction models is usually high. To address this issue, we propose a novel schema-based data augmentation method that utilizes event schemas to guide the data generation process. The event schemas depict the typical patterns of complex events and can be used to create new synthetic data for event extraction. Specifically, we sub-sample from the schema graph to obtain a subgraph, instantiate the schema subgraph, and then convert the instantiated subgraph to natural language texts. We conduct extensive experiments on event trigger detection, event trigger extraction, and event argument extraction tasks using two datasets (including five scenarios). The experimental results demonstrate that our proposed data-augmentation method produces high-quality generated data and significantly enhances the model performance, with up to 12{\%} increase in F1 score compared to baseline methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jin-ji-2024-schema">
<titleInfo>
<title>Schema-based Data Augmentation for Event Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaomeng</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Event extraction is a crucial task for semantic understanding and structured knowledge construction. However, the expense of collecting and labeling data for training event extraction models is usually high. To address this issue, we propose a novel schema-based data augmentation method that utilizes event schemas to guide the data generation process. The event schemas depict the typical patterns of complex events and can be used to create new synthetic data for event extraction. Specifically, we sub-sample from the schema graph to obtain a subgraph, instantiate the schema subgraph, and then convert the instantiated subgraph to natural language texts. We conduct extensive experiments on event trigger detection, event trigger extraction, and event argument extraction tasks using two datasets (including five scenarios). The experimental results demonstrate that our proposed data-augmentation method produces high-quality generated data and significantly enhances the model performance, with up to 12% increase in F1 score compared to baseline methods.</abstract>
<identifier type="citekey">jin-ji-2024-schema</identifier>
<location>
<url>http://aclanthology.org/2024.lrec-main.1253/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>14382</start>
<end>14392</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Schema-based Data Augmentation for Event Extraction
%A Jin, Xiaomeng
%A Ji, Heng
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F jin-ji-2024-schema
%X Event extraction is a crucial task for semantic understanding and structured knowledge construction. However, the expense of collecting and labeling data for training event extraction models is usually high. To address this issue, we propose a novel schema-based data augmentation method that utilizes event schemas to guide the data generation process. The event schemas depict the typical patterns of complex events and can be used to create new synthetic data for event extraction. Specifically, we sub-sample from the schema graph to obtain a subgraph, instantiate the schema subgraph, and then convert the instantiated subgraph to natural language texts. We conduct extensive experiments on event trigger detection, event trigger extraction, and event argument extraction tasks using two datasets (including five scenarios). The experimental results demonstrate that our proposed data-augmentation method produces high-quality generated data and significantly enhances the model performance, with up to 12% increase in F1 score compared to baseline methods.
%U http://aclanthology.org/2024.lrec-main.1253/
%P 14382-14392
Markdown (Informal)
[Schema-based Data Augmentation for Event Extraction](http://aclanthology.org/2024.lrec-main.1253/) (Jin & Ji, LREC-COLING 2024)
ACL
- Xiaomeng Jin and Heng Ji. 2024. Schema-based Data Augmentation for Event Extraction. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 14382–14392, Torino, Italia. ELRA and ICCL.