Source code for pyabsa.utils.data_utils.dataset_item

# -*- coding: utf-8 -*-
# file: dataset_item.py
# time: 02/11/2022 18:56
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
# github: https://github.com/yangheng95
# GScholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
# ResearchGate: https://www.researchgate.net/profile/Heng-Yang-17/research
# Copyright (C) 2022. All Rights Reserved.
import os


# To replace the class defined in https://github.com/yangheng95/PyABSA/blob/release/pyabsa/functional/dataset/dataset_manager.py#L18,
# so that the inference script works on a custom dataset.
[docs] class DatasetItem(list): def __init__(self, dataset_name, dataset_items=None): """ Initializes a DatasetItem object with the given dataset_name and dataset_items. DatasetItem is used to construct a dataset for PyABSA. Each dataset has a name, you can merge multiple datasets into one dataset by "dataset_items". If dataset_name is a list, the dataset_name will be set to "Unnamed_Dataset" and the dataset_items will be set to dataset_name. :param dataset_name: The name of the dataset. Can be a string or a list of strings. :param dataset_items: The list of dataset names or file paths. Default is None. """ self.name = None # If the dataset_name is a DatasetItem object, copy its attributes to this object if isinstance(dataset_name, DatasetItem): self.dataset_name = dataset_name.dataset_name self.name = dataset_name.name # Append all the items in dataset_items to this object for d in dataset_items: self.append(d) else: # Initialize a list object super().__init__() # If the dataset_name is a list, set dataset_items to dataset_name if isinstance(dataset_name, list): dataset_items = dataset_name dataset_name = "Unnamed_Dataset" # If the dataset_name is a valid file path, set dataset_name to the basename of the file path if os.path.exists(dataset_name): while dataset_name and dataset_name[-1] in ["/", "\\"]: dataset_name = dataset_name[:-1] self.dataset_name = os.path.basename(dataset_name) else: # Set the dataset_name to the given value self.dataset_name = dataset_name # If dataset_items is None, set it to dataset_name if not dataset_items: dataset_items = dataset_name # Append the dataset_items to this object if not isinstance(dataset_items, list): self.append(dataset_items) else: for d in dataset_items: self.append(d) # Set the name attribute to the dataset_name self.name = self.dataset_name