# -*- coding: utf-8 -*-
import re
from collections import Counter, OrderedDict

import numpy as np
import tqdm
from pyabsa.tasks.AspectPolarityClassification.dataset_utils.__lcf__.apc_utils import (
from ..dataset_utils.aste_utils import (

from pyabsa.utils.file_utils.file_utils import load_dataset_from_file
from pyabsa.utils.pyabsa_utils import fprint

label_to_index, index_to_label = OrderedDict(), OrderedDict() for i, v in enumerate(labels): label_to_index[v] = i index_to_label[i] = v
def __init__(self, config, tokenizer, dataset_type="train"): = None self.nlp = configure_spacy_model(config) self.config = config self.tokenizer = tokenizer self.dataset_type = dataset_type self.config.label_to_index = self.label_to_index self.config.index_to_label = self.index_to_label self.config.output_dim = len(self.label_to_index)
[docs] def get_dependencies(self, tokens): # Replace special characters in tokens with placeholders placeholder_tokens = [] for token in tokens: if"[^\w\s]", token): placeholder = f"__{token}__" placeholder_tokens.append(placeholder) else: placeholder_tokens.append(token) # Get part-of-speech tags and dependencies using spaCy doc = self.nlp(" ".join(tokens)) postags = [token.pos_ for token in doc] heads = [token.head.i for token in doc] deprels = [token.dep_ for token in doc] return postags, heads, deprels
