From: noah morrison <noah@morrison.ph>
Date: Tue, 23 Dec 2014 03:57:23 +0000 (-0500)
Subject: Split tokenizer up into helper functions
X-Git-Url: https://git.devinivas.org/?a=commitdiff_plain;h=dbccec5aa32313657ff38df13f40487a8bff0bca;p=chevron.git

Split tokenizer up into helper functions

In an attempt to make it more readable, and seperate parsing
logic from tokenizing logic.
---

diff --git a/chevron/tokenizer.py b/chevron/tokenizer.py
index 9a8adaf..95a4986 100644
--- a/chevron/tokenizer.py
+++ b/chevron/tokenizer.py
@@ -1,6 +1,112 @@
 #!/usr/bin/python
 
 
+#
+# Helper functions
+#
+
+def grab_literal(template, l_del):
+    """Parse a literal from the template"""
+
+    try:
+        # Look for the next tag and move the template to it
+        literal, template = template.split(l_del, 1)
+        return (literal, template)
+
+    # There are no more tags in the template?
+    except ValueError:
+        # Then the rest of the template is a literal
+        return (template, '')
+
+
+def l_sa_check(template, literal, is_standalone):
+    """Do a preliminary check to see if a tag could be a standalone"""
+
+    # If there is a newline, or the previous tag was a standalone
+    if literal.find('\n') != -1 or is_standalone:
+        padding = literal.split('\n')[-1]
+
+        # If all the characters since the last newline are spaces
+        if padding.isspace() or padding == '':
+            # Then the next tag could be a standalone
+            return True
+        else:
+            # Otherwise it can't be
+            return False
+
+
+def r_sa_check(template, tag_type, is_standalone):
+    """Do a final checkto see if a tag could be a standalone"""
+
+    # Check right side if we might be a standalone
+    if is_standalone and tag_type not in ['variable', 'no escape']:
+        on_newline = template.split('\n', 1)
+
+        # If the stuff to the right of us are spaces we're a standalone
+        if on_newline[0].isspace() or not on_newline[0]:
+            return True
+        else:
+            return False
+
+    # If we're a tag can't be a standalone
+    else:
+        return False
+
+
+def parse_tag(template, l_del, r_del):
+    """Parse a tag from a template"""
+
+    tag_types = {
+        '!': 'comment',
+        '#': 'section',
+        '^': 'inverted section',
+        '/': 'end',
+        '>': 'partial',
+        '=': 'set delimiter?',
+        '{': 'no escape?',
+        '&': 'no escape'
+    }
+
+    # Get the tag
+    tag, template = template.split(r_del, 1)
+
+    # Find the type meaning of the first character
+    tag_type = tag_types.get(tag[0], 'variable')
+
+    # If the type is not a variable
+    if tag_type != 'variable':
+        # Then that first character is not needed
+        tag = tag[1:]
+
+    # If we might be a set delimiter tag
+    if tag_type == 'set delimiter?':
+        # Double check to make sure we are
+        if tag.endswith('='):
+            tag_type = 'set delimiter'
+            # Remove the equal sign
+            tag = tag[:-1]
+
+        # Otherwise we should complain
+        else:
+            raise SyntaxError('Unmatched set delimiter tag')
+
+    # If we might be a no html escape tag
+    elif tag_type == 'no escape?':
+        # And we have a third curly brace
+        # (And are using curly braces as delimiters)
+        if l_del == '{{' and r_del == '}}' and template.startswith('}'):
+            # Then we are a no html escape tag
+            template = template[1:]
+            tag_type = 'no escape'
+
+    # Strip the whitespace off the key and return
+    return ((tag_type, tag.strip()), template)
+
+
+#
+# The main tokenizing function
+#
+
 def tokenize(template, def_ldel='{{', def_rdel='}}'):
     """Tokenize a mustache template
 
@@ -38,17 +144,6 @@ def tokenize(template, def_ldel='{{', def_rdel='}}'):
     the literal itself.
     """
 
-    tag_types = {
-        '!': 'comment',
-        '#': 'section',
-        '^': 'inverted section',
-        '/': 'end',
-        '>': 'partial',
-        '=': 'set delimiter?',
-        '{': 'no escape?',
-        '&': 'no escape'
-    }
-
     # If the template is a file-like object then read it
     try:
         template = template.read()
@@ -61,15 +156,7 @@ def tokenize(template, def_ldel='{{', def_rdel='}}'):
     r_del = def_rdel
 
     while template:
-
-        try:
-            # Look for the next tag and move the template to it
-            literal, template = template.split(l_del, 1)
-
-        # There are no more tags in the template?
-        except ValueError:
-            # Then the rest of the template is a literal
-            literal, template = (template, '')
+        literal, template = grab_literal(template, l_del)
 
         # If the template is completed
         if not template:
@@ -77,48 +164,20 @@ def tokenize(template, def_ldel='{{', def_rdel='}}'):
             yield ('literal', literal)
             break
 
-        # Checking if the next tag could be a standalone
-        # If there is a newline, or the previous tag was a standalone
-        if literal.find('\n') != -1 or is_standalone:
-            padding = literal.split('\n')[-1]
-
-            # If all the characters since the last newline are spaces
-            if padding.isspace() or padding == '':
-                # Then the next tag could be a standalone
-                is_standalone = True
-            else:
-                # Otherwise it can't be
-                is_standalone = False
-
-        # Start work on the tag
-        # Find the type meaning of the first character
-        tag_type = tag_types.get(template[0], 'variable')
-
-        # If the type is not a variable
-        if tag_type != 'variable':
-            # Then that first character is not needed
-            template = template[1:]
+        # Do the first check to see if we could be a standalone
+        is_standalone = l_sa_check(template, literal, is_standalone)
+
+        # Parse the tag
+        tag, template = parse_tag(template, l_del, r_del)
+        tag_type, tag_key = tag
+
+        # Special tag logic
 
-        # Grab and strip the whitespace off the key
-        tag_key, template = template.split(r_del, 1)
-        tag_key = tag_key.strip()
-
-        # If we might be a no html escape tag
-        if tag_type == 'no escape?':
-            # And we have a third curly brace
-            # (And are using curly braces as delimiters)
-            if template[0] == '}' and l_del == '{{' and r_del == '}}':
-                # Then we are a no html escape tag
-                template = template[1:]
-                tag_type = 'no escape'
-
-        # If we might be a set delimiter tag
-        elif tag_type == 'set delimiter?':
-            # If our key ends with an equal sign
-            if tag_key.endswith('='):
-                # Then get and set the delimiters
-                dels = tag_key[:-1].strip().split(' ')
-                l_del, r_del = dels[0], dels[-1]
+        # If we are a set delimiter tag
+        if tag_type == 'set delimiter':
+            # Then get and set the delimiters
+            dels = tag_key.strip().split(' ')
+            l_del, r_del = dels[0], dels[-1]
 
         # If we are a section tag
         elif tag_type in ['section', 'inverted section']:
@@ -135,23 +194,18 @@ def tokenize(template, def_ldel='{{', def_rdel='}}'):
                 raise SyntaxError('End tag does not match '
                                   'the currently opened section')
 
-        # Check right side if we might be a standalone
-        if is_standalone and tag_type not in ['variable', 'no escape']:
-            on_newline = template.split('\n', 1)
-
-            # If the stuff to the right of us are spaces we're a standalone
-            if on_newline[0].isspace() or not on_newline[0]:
-                # Remove the stuff before the newline
-                template = on_newline[-1]
-                if tag_type != 'partial':
-                    # Then we need to remove the spaces from the left
-                    literal = literal.rstrip(' ')
-            else:
-                is_standalone = False
-
-        # If we're a tag can't be a standalone
-        else:
-            is_standalone = False
+        # Do the second check to see if we're a standalone
+        is_standalone = r_sa_check(template, tag_type, is_standalone)
+
+        # Which if we are
+        if is_standalone:
+            # Remove the stuff before the newline
+            template = template.split('\n', 1)[-1]
+
+            # Partials need to keep the spaces on their left
+            if tag_type != 'partial':
+                # But other tags don't
+                literal = literal.rstrip(' ')
 
         # Start yielding
         # Ignore literals that are empty