diff options
author | Clayton G. Hobbs <clay@lakeserv.net> | 2016-01-02 13:31:42 -0500 |
---|---|---|
committer | Clayton G. Hobbs <clay@lakeserv.net> | 2016-01-02 13:31:42 -0500 |
commit | 93f09d0d9631bb1c90afd8744ca16bf4b7b79e3b (patch) | |
tree | 1f1f8a9b8a5678d32007b0c30bef860a90d01920 | |
parent | 557f0b60cb19b22fd79eb1f81a58139c4942f2c4 (diff) |
Rewrote NumberParser.parse_all_numbers()
Its control flow was confusing before; now it's much more straightforward. We make a string representing classes of words, split that by a regular expression for number words, then parse each number and build up our return string and list. It works just as well as the previous method, is a bit shorter, and I feel that it's clearer as well.
-rw-r--r-- | numberparser.py | 47 |
1 files changed, 22 insertions, 25 deletions
diff --git a/numberparser.py b/numberparser.py index a87698d..f02bb6e 100644 --- a/numberparser.py +++ b/numberparser.py @@ -139,44 +139,41 @@ class NumberParser: def parse_all_numbers(self, text_line): nums = [] t_numless = '' - current_num = '' # Split text_line by commas, whitespace, and hyphens - text_line = text_line.strip() - text_words = re.split(r'[,\s-]+', text_line) + text_words = re.split(r'[,\s-]+', text_line.strip()) + # Get a string of word classes + tw_classes = '' for word in text_words: - # If we aren't starting a number, add the word to the result string - if word not in self.mandatory_number_words: - if current_num: - if word in self.number_words: - current_num += word + ' ' - else: - try: - nums.append(self.parse_number(current_num)) - except ValueError: - nums.append(-1) - current_num = '' - t_numless += '%d' + ' ' - if not current_num: - t_numless += word + ' ' + if word in self.mandatory_number_words: + tw_classes += 'm' + elif word in self.allowed: + tw_classes += 'a' else: - # We're parsing a number now - current_num += word + ' ' - if current_num: + tw_classes += 'w' + + # For each string of number words: + last_end = 0 + for m in re.finditer('m[am]*m|m', tw_classes): + # Get the number words + num_words = ' '.join(text_words[m.start():m.end()]) + # Parse the number and store the value try: - nums.append(self.parse_number(current_num)) + nums.append(self.parse_number(num_words)) except ValueError: nums.append(-1) - current_num = '' - t_numless += '%d' + ' ' + # Add words to t_numless + t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d ' + last_end = m.end() + t_numless += ' '.join(text_words[last_end:]) return (t_numless.strip(), nums) if __name__ == '__main__': np = NumberParser() # Get the words to translate - text_line = input('Enter a number: ') + text_line = input('Enter a string: ') # Parse it to an integer value = np.parse_all_numbers(text_line) # Print the result - print('I claim that you meant the decimal number', value) + print(value) |