Source code for pyabsa.tasks.__SubtaskTemplate__.dataset_utils.dataset_list

# -*- coding: utf-8 -*-
# file: dataset_list.py
# time: 02/11/2022 19:35
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
# github: https://github.com/yangheng95
# GScholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# ResearchGate: https://www.researchgate.net/profile/Heng-Yang-17/research
# Copyright (C) 2022. All Rights Reserved.

from pyabsa.utils.data_utils.dataset_item import DatasetItem


[docs] class APCDatasetList(list): """ The following datasets are for aspect polarity classification task. The datasets are collected from different sources, you can use the id to locate the dataset. """
[docs] Laptop14 = DatasetItem("Laptop14", "113.Laptop14")
[docs] Restaurant14 = DatasetItem("Restaurant14", "114.Restaurant14")
# https://github.com/zhijing-jin/ARTS_TestSet
[docs] ARTS_Laptop14 = DatasetItem("ARTS_Laptop14", "111.ARTS_Laptop14")
[docs] ARTS_Restaurant14 = DatasetItem("ARTS_Restaurant14", "112.ARTS_Restaurant14")
[docs] Restaurant15 = DatasetItem("Restaurant15", "115.Restaurant15")
[docs] Restaurant16 = DatasetItem("Restaurant16", "116.Restaurant16")
# Twitter
[docs] ACL_Twitter = DatasetItem("Twitter", "101.ACL_Twitter")
[docs] MAMS = DatasetItem("MAMS", "109.MAMS")
# @R Mukherjee et al.
[docs] Television = DatasetItem("Television", "117.Television")
[docs] TShirt = DatasetItem("TShirt", "118.TShirt")
# @WeiLi9811 https://github.com/WeiLi9811
[docs] Yelp = DatasetItem("Yelp", "119.Yelp")
# Chinese (binary polarity)
[docs] Phone = DatasetItem("Phone", "107.Phone")
[docs] Car = DatasetItem("Car", "104.Car")
[docs] Notebook = DatasetItem("Notebook", "106.Notebook")
[docs] Camera = DatasetItem("Camera", "103.Camera")
# Chinese (triple polarity) # brightgems@github https://github.com/brightgems # Note that the annotation strategy of this dataset is highly different from other datasets, # please dont mix this dataset with any other dataset in trainer
[docs] Shampoo = DatasetItem("Shampoo", "108.Shampoo")
# jmc123@github https://github.com/jmc-123
[docs] MOOC = DatasetItem("MOOC", "105.MOOC")
[docs] MOOC_En = DatasetItem("MOOC_En", "121.MOOC_En")
# https://www.kaggle.com/datasets/cf7394cb629b099cf94f3c3ba87e1d37da7bfb173926206247cd651db7a8da07
[docs] Kaggle = DatasetItem("Kaggle", "129.Kaggle")
[docs] Chinese_Zhang = DatasetItem("Chinese_Zhang", ["130.Chinese_Zhang"])
# assembled dataset
[docs] Chinese = DatasetItem( "Chinese", [ "107.Phone", "103.Camera", "106.Notebook", "104.Car", "105.MOOC", "130.Chinese_Zhang", ], )
[docs] Binary_Polarity_Chinese = DatasetItem( "Chinese", ["107.Phone", "103.Camera", "106.Notebook", "104.Car"] )
[docs] Triple_Polarity_Chinese = DatasetItem("Chinese3way", ["105.MOOC"])
[docs] SemEval2016Task5 = DatasetItem("SemEval2016Task5", ["120.SemEval2016Task5"])
[docs] Arabic_SemEval2016Task5 = DatasetItem("Arabic_SemEval2016Task5", ["122.Arabic"])
[docs] Dutch_SemEval2016Task5 = DatasetItem("Dutch_SemEval2016Task5", ["123.Dutch"])
[docs] Spanish_SemEval2016Task5 = DatasetItem("Spanish_SemEval2016Task5", ["127.Spanish"])
[docs] Turkish_SemEval2016Task5 = DatasetItem("Turkish_SemEval2016Task5", ["128.Turkish"])
[docs] Russian_SemEval2016Task5 = DatasetItem("Russian_SemEval2016Task5", ["126.Russian"])
[docs] French_SemEval2016Task5 = DatasetItem("French_SemEval2016Task5", ["125.French"])
[docs] English_SemEval2016Task5 = DatasetItem("English_SemEval2016Task5", ["124.English"])
[docs] English = DatasetItem( "English", [ "113.Laptop14", "114.Restaurant14", "116.Restaurant16", "101.ACL_Twitter", "109.MAMS", "117.Television", "118.TShirt", "119.Yelp", "121.MOOC_En", "129.Kaggle", ], )
# Abandon rest15 dataset due to data leakage, See https://github.com/yangheng95/PyABSA/issues/53
[docs] SemEval = DatasetItem( "SemEval", ["113.Laptop14", "114.Restaurant14", "116.Restaurant16"] )
[docs] Restaurant = DatasetItem("Restaurant", ["114.Restaurant14", "116.Restaurant16"])
[docs] Multilingual = DatasetItem( "Multilingual", [ "113.Laptop14", "114.Restaurant14", "116.Restaurant16", "101.ACL_Twitter", "109.MAMS", "117.Television", "118.TShirt", "119.Yelp", "107.Phone", "103.Camera", "106.Notebook", "104.Car", "105.MOOC", "129.Kaggle", "120.SemEval2016Task5", "121.MOOC_En", "130.Chinese_Zhang", ], )
def __init__(self): super(APCDatasetList, self).__init__( [ self.Laptop14, self.Restaurant14, self.ARTS_Laptop14, self.ARTS_Restaurant14, self.Restaurant15, self.Restaurant16, self.ACL_Twitter, self.MAMS, self.Television, self.TShirt, self.Yelp, self.Phone, self.Car, self.Notebook, self.Camera, self.Shampoo, self.MOOC, self.MOOC_En, self.Kaggle, self.Chinese_Zhang, self.Chinese, self.Binary_Polarity_Chinese, self.Triple_Polarity_Chinese, self.SemEval2016Task5, self.Arabic_SemEval2016Task5, self.Dutch_SemEval2016Task5, self.Spanish_SemEval2016Task5, self.Turkish_SemEval2016Task5, self.Russian_SemEval2016Task5, self.French_SemEval2016Task5, self.English_SemEval2016Task5, self.English, self.SemEval, self.Restaurant, self.Multilingual, ] )