问题描述
我试图接受一个输入,因为“民主被高估了。”并返回“democr _acy is underrat_ed”
sentence= input()
suffixes = ["acy","tion","ate","er","fy","ize","able","ible","al","esque","ful","ic","ous","ish","ive","less","ed"]
for pattern in suffixes :
if pattern in sentence:
out = ''
par = sentence.partition(pattern)
while par[1]:
out += ' _'.join([par[0],par[1]])
remainder = par[2]
par = par[2].partition(pattern)
sentence = ''.join([out,remainder])
print(''.join([out,remainder]))
如您所见,我的输出是“democr _acy is ov _err _at _ed”。我知道我必须在句子末尾搜索一个后缀并将其拆分为有意义的后缀。为此,我认为 sentence.endswith 可能有效,但实际上我不确定如何做到这一点:(
解决方法
suffixes = ["acy","tion","ate","er","fy","ize","able","ible","al","esque","ful","ic","ous","ish","ive","less","ed"]
def suffixize(sentence):
words = []
# Split the original sentence in spaces and iterate over each word
for word in sentence.split(' '):
# If this word ends with some suffix,return this suffix,else return None
suffix = next((suffix for suffix in suffixes if word.endswith(suffix)),None)
# If this word does not end with any suffix
if suffix is None:
# Leave it as it is
words.append(word)
else:
# Remove the suffix from the word,append a _ and add the suffix
words.append(f'{word[:-len(suffix)]}_{suffix}')
# Join the words using a space
return ' '.join(words)
print(suffixize("Democracy is overrated"))
# Output: 'Democr_acy is overrat_ed'
请注意,这不会保留原始输入中的空格。这可能是您想要的,也可能不是:
print(suffixize("Democracy is overrated"))
# Output: 'Democr_acy is overrat_ed'
要解决此问题,您可以使用 re
模块:
suffixes = ["acy","ed"]
import re
# Use named groups to differentiate between words and whitespace
pattern = re.compile(r'(?P<w>\w+)|(?P<s>\W+)')
def suffixize(sentence):
tokens = []
for match in pattern.finditer(sentence):
word = match.group()
# If it's a white-space
if match.lastgroup == 's':
# Just add to tokens
tokens.append(word)
else:
# Do the suffix search
suffix = next((suffix for suffix in suffixes if word.endswith(suffix)),None)
if suffix is None:
tokens.append(word)
else:
tokens.append(f'{word[:-len(suffix)]}_{suffix}')
return ''.join(tokens)
print(suffixize("Democracy is overrated"))
# Output: 'Democr_acy is overrat_ed'
,
如果 suffixes
是您的后缀列表,则:
sentence = "democracy is overrated"
out = []
for word in sentence.split():
for s in suffixes:
if word.endswith(s):
word = word[: len(word) - len(s)] + "_" + word[len(word) - len(s) :]
break
out.append(word)
print(" ".join(out))
打印:
democr_acy is overrat_ed
,
这是另一种不使用导入的方法:
suffixes = ["acy","ed"]
def split_alpha(sentence):
words = []
# Create an alphabet with words
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alphabet += alphabet.lower()
# Store the current word being iterated
current_word = None
# Store if the current word is a alphabetical word
is_alpha = False
for char in sentence:
is_current_alpha = char in alphabet
# If a word is not defined yet
if current_word is None:
# Create a new word with the current char
current_word = char
is_alpha = is_current_alpha
else:
# If the current word has the same
# 'alphabeticity' of the current char
if is_current_alpha == is_alpha:
current_word += char
else:
# Apprend the previous word to `words`
words.append(current_word)
# Create a new word with the current char
current_word = char
is_alpha = is_current_alpha
if current_word is not None:
words.append(current_word)
return words
def suffixize(sentence):
# Split the sentence into words
words = split_alpha(sentence)
# Split the original sentence in spaces and iterate over each word
for word in words:
# If this word ends with some suffix,append a _ and add the suffix
words.append(f'{word[:-len(suffix)]}_{suffix}')
# Join the words using a space
return ' '.join(words)
assert split_alpha("abc") == ["abc"]
assert split_alpha(" ") == [" "]
assert split_alpha("a1b2c3") == ["a","1","b","2","c","3"]
assert split_alpha("hey there") == ["hey"," ","there"]
assert split_alpha("democracy,is overrated!") == ["democracy",","is","overrated","!"]
assert suffixize("Democracy is overrated") == 'Democr_acy is overrat_ed'
assert suffixize("democracy,is overrated!") == 'democr_acy,is overrat_ed!'