From fdaf5093262caf5a208c0645a0d64358e48c8f6e Mon Sep 17 00:00:00 2001
From: Sebastian Ruder <ruder.sebastian@gmail.com>
Date: Sun, 23 Feb 2020 22:55:37 +0000
Subject: [PATCH] Added reading comprehension datasets for French and Russian

---
 README.md                     | 26 +++++++++++++++++---------
 french/question_answering.md  | 32 ++++++++++++++++++++++++++++++++
 russian/question_answering.md | 25 +++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 9 deletions(-)
 create mode 100644 french/question_answering.md
 create mode 100644 russian/question_answering.md

diff --git a/README.md b/README.md
index d14b4211..d26f7f74 100644
--- a/README.md
+++ b/README.md
@@ -41,10 +41,13 @@
 - [Text classification](english/text_classification.md)
 - [Word sense disambiguation](english/word_sense_disambiguation.md)
 
-### Chinese
+### Vietnamese
 
-- [Entity linking](chinese/chinese.md#entity-linking)
-- [Chinese word segmentation](chinese/chinese_word_segmentation.md)
+- [Dependency parsing](vietnamese/vietnamese.md#dependency-parsing)
+- [Machine translation](vietnamese/vietnamese.md#machine-translation)
+- [Named entity recognition](vietnamese/vietnamese.md#named-entity-recognition)
+- [Part-of-speech tagging](vietnamese/vietnamese.md#part-of-speech-tagging)
+- [Word segmentation](vietnamese/vietnamese.md#word-segmentation)
 
 ### Hindi
 
@@ -52,13 +55,18 @@
 - [Part-of-speech tagging](hindi/hindi.md#part-of-speech-tagging)
 - [Machine Translation](hindi/hindi.md#machine-translation)
 
-### Vietnamese
+### Chinese
 
-- [Dependency parsing](vietnamese/vietnamese.md#dependency-parsing)
-- [Machine translation](vietnamese/vietnamese.md#machine-translation)
-- [Named entity recognition](vietnamese/vietnamese.md#named-entity-recognition)
-- [Part-of-speech tagging](vietnamese/vietnamese.md#part-of-speech-tagging)
-- [Word segmentation](vietnamese/vietnamese.md#word-segmentation)
+- [Entity linking](chinese/chinese.md#entity-linking)
+- [Chinese word segmentation](chinese/chinese_word_segmentation.md)
+
+### French
+
+- [Question answering](french/question_answering.md)
+
+### Russian
+
+- [Question answering](russian/question_answering.md)
 
 ### Spanish
 
diff --git a/french/question_answering.md b/french/question_answering.md
new file mode 100644
index 00000000..451d96c9
--- /dev/null
+++ b/french/question_answering.md
@@ -0,0 +1,32 @@
+# Question answering
+
+Question answering is the task of answering a question.
+
+### Table of contents
+
+- [Reading comprehension](#reading-comprehension)
+  - [FQuAD](#fquad)
+  
+## Reading comprehension
+  
+### FQuAD
+
+The [French Question Answering dataset (FQuAD)](https://arxiv.org/abs/2002.06071) is a 
+reading comprehension dataset in the style of SQuAD. It consists of 25k questions on 
+Wikipedia articles. The dataset is available [here](https://fquad.illuin.tech/).
+
+Example:
+
+| Document  | Question | Answer |
+| ------------- | -----:| -----: |
+| Des observations de 2015 par la sonde Dawn ont confirmé qu'elle possède une forme sphérique, à la différence des corps plus petits qui ont une forme irrégulière. [...] |A quand remonte les observations faites par la sonde Dawn ? | 2015 |
+
+| Model           | F1 | EM |  Paper |
+| ------------- | :-----:| :-----:| --- |
+| Human performance | 92.1 | 78.4 | [FQuAD: French Question Answering Dataset](https://arxiv.org/abs/2002.06071) |
+| CamemBERTQA (d'Hoffschmidt et al., 2020)* | 88.0 | 77.9 | [FQuAD: French Question Answering Dataset](https://arxiv.org/abs/2002.06071) |
+| CamemBERTQA (d'Hoffschmidt et al., 2020)† | 84.1 | 70.9 | [FQuAD: French Question Answering Dataset](https://arxiv.org/abs/2002.06071) |
+
+*: trained on the FQuAD training set 
+
+†: trained on the SQuAD training set and zero-shot transferred to the FQuAD test set.
\ No newline at end of file
diff --git a/russian/question_answering.md b/russian/question_answering.md
new file mode 100644
index 00000000..4fada945
--- /dev/null
+++ b/russian/question_answering.md
@@ -0,0 +1,25 @@
+# Question answering
+
+Question answering is the task of answering a question.
+
+### Table of contents
+
+- [Reading comprehension](#reading-comprehension)
+  - [SberQuAD](#sberquad)
+  
+  
+## Reading comprehension
+  
+### SberQuAD
+
+The [Sberbank Question Answering dataset (SberQuAD)](https://arxiv.org/abs/1912.09723) is a reading comprehension dataset
+in the style of SQuAD, which was created as part of a competition in 2017 by Sberbank. The data consists of around 50k
+questions on Wikipeda. 
+
+Because the original SberQuAD development set is not available, the original training set of SberQuAD was partitioned
+into a (new) training (45,328) and testing (5,036) sets by the DeepPavlov team.
+
+| Model           | F1 | EM |  Paper |
+| ------------- | :-----:| :-----:| --- |
+| BERT (Efimov et al., 2019) | 84.8 | 66.6 | [SberQuAD - Russian Reading Comprehension Dataset: Description and Analysis](https://arxiv.org/abs/1912.09723) |
+| DocQA (Efimov et al., 2019) | 79.5 | 59.6 | [SberQuAD - Russian Reading Comprehension Dataset: Description and Analysis](https://arxiv.org/abs/1912.09723) |