remove – using regular expression

import regex as re
def tokenize(text):
    return re.findall(r'[w-][-]*p{L}[w-]*',text)
text ="let's defeat the SARS-coV-2 delta variant together in 2021!"
tokens= tokenize(text)
print("|".join(tokens))

My output is like that

let|defeat|the|SARS-coV-2|delta|variant|together|in

I would like to get the following out put with no –
|Let|s|defeat|the|SARS|CoV|Delta|variant|together|in

Answer

You can simplify your regex pattern by just using re.split() on the characters that you consider as word-separators such as apostrophe ', space , dash -, etc.

from itertools import filterfalse
import regex as re

def tokenize(text):
    splits = re.split("['s-]", text)
    splits = list(filterfalse(lambda value: re.search("d", value), splits))  # Remove this line if you wish to include the digits
    if splits:
        splits[0] = splits[0].capitalize()
    return splits

text ="let's defeat the SARS-coV-2 delta variant together in 2021!"
tokens= tokenize(text)
print("|" + "|".join(tokens))  # Remove <"|" +> if you don't intend to put a "|" at the start.

Output:

|Let|s|defeat|the|SARS|coV|delta|variant|together|in