class Solution:
def isValid(self, code: str) -> bool:
stack = []
i = 0
n = len(code)
while i < n:
if code.startswith("<![CDATA[", i):
j = code.find("]]>", i)
if j == -1:
return False
i = j + 3
elif code.startswith("</", i):
j = code.find(">", i)
if j == -1:
return False
tag = code[i+2:j]
if not stack or stack[-1] != tag:
return False
stack.pop()
i = j + 1
if not stack and i != n:
return False
elif code.startswith("<", i):
j = code.find(">", i)
if j == -1:
return False
tag = code[i+1:j]
if not (1 <= len(tag) <= 9) or not tag.isupper():
return False
stack.append(tag)
i = j + 1
else:
if not stack:
return False
i += 1
return not stack
class Solution {
public:
bool isValid(string code) {
stack<string> st;
int i = 0, n = code.size();
while (i < n) {
if (code.substr(i, 9) == "<![CDATA[") {
int j = code.find("]]>", i);
if (j == string::npos) return false;
i = j + 3;
} else if (code.substr(i, 2) == "</") {
int j = code.find(">", i);
if (j == string::npos) return false;
string tag = code.substr(i+2, j-i-2);
if (st.empty() || st.top() != tag) return false;
st.pop();
i = j + 1;
if (st.empty() && i != n) return false;
} else if (code[i] == '<') {
int j = code.find(">", i);
if (j == string::npos) return false;
string tag = code.substr(i+1, j-i-1);
if (tag.empty() || tag.size() > 9 || tag.size() < 1) return false;
for (char c : tag) if (c < 'A' || c > 'Z') return false;
st.push(tag);
i = j + 1;
} else {
if (st.empty()) return false;
i++;
}
}
return st.empty();
}
};
class Solution {
public boolean isValid(String code) {
Stack<String> stack = new Stack<>();
int i = 0, n = code.length();
while (i < n) {
if (code.startsWith("<![CDATA[", i)) {
int j = code.indexOf("]]>", i);
if (j == -1) return false;
i = j + 3;
} else if (code.startsWith("</", i)) {
int j = code.indexOf(">", i);
if (j == -1) return false;
String tag = code.substring(i+2, j);
if (stack.isEmpty() || !stack.peek().equals(tag)) return false;
stack.pop();
i = j + 1;
if (stack.isEmpty() && i != n) return false;
} else if (code.startsWith("<", i)) {
int j = code.indexOf(">", i);
if (j == -1) return false;
String tag = code.substring(i+1, j);
if (tag.length() < 1 || tag.length() > 9) return false;
for (char c : tag.toCharArray()) {
if (c < 'A' || c > 'Z') return false;
}
stack.push(tag);
i = j + 1;
} else {
if (stack.isEmpty()) return false;
i++;
}
}
return stack.isEmpty();
}
}
var isValid = function(code) {
let stack = [];
let i = 0, n = code.length;
while (i < n) {
if (code.startsWith("<![CDATA[", i)) {
let j = code.indexOf("]]>", i);
if (j === -1) return false;
i = j + 3;
} else if (code.startsWith("</", i)) {
let j = code.indexOf(">", i);
if (j === -1) return false;
let tag = code.substring(i+2, j);
if (stack.length === 0 || stack[stack.length - 1] !== tag) return false;
stack.pop();
i = j + 1;
if (stack.length === 0 && i !== n) return false;
} else if (code.startsWith("<", i)) {
let j = code.indexOf(">", i);
if (j === -1) return false;
let tag = code.substring(i+1, j);
if (tag.length < 1 || tag.length > 9) return false;
for (let c of tag) {
if (c < 'A' || c > 'Z') return false;
}
stack.push(tag);
i = j + 1;
} else {
if (stack.length === 0) return false;
i++;
}
}
return stack.length === 0;
};
The "Tag Validator" problem asks you to implement a function that checks whether a given string code
is a valid code snippet according to a simplified set of XML-like rules:
<TAG>...</TAG>
.<![CDATA[
and end with ]]>
. CDATA content is treated as plain text and ignored by the tag validator.false
.
The function should return true
if code
is valid, and false
otherwise.
At first glance, this problem looks like parsing XML or HTML, but with stricter rules. The naive approach would be to try to match tags using regular expressions or string matching, but this quickly becomes error-prone due to nesting and CDATA sections.
The key insight is that tags are hierarchical and must be matched in order, which is a classic use case for a stack. Each time we see an opening tag, we push it onto the stack. When we see a closing tag, we pop the top of the stack and check if it matches.
CDATA sections complicate things, as they can contain any characters (including angle brackets), but should be ignored by the validator. So, whenever we encounter a CDATA section, we skip over it entirely.
Brute-force approaches (like replacing tags or using regex) fail for deep nesting or edge cases. Instead, a character-by-character scan with a stack ensures we process everything in the correct order and efficiently.
Here is a step-by-step breakdown of the algorithm:
i
:
i
is <![CDATA[
, find the next ]]>
and skip over the CDATA section.</
), find the next >
and extract the tag name. Check that it matches the top of the stack and pop it.<
), extract the tag name and push it onto the stack. Ensure the tag name is 1-9 uppercase letters.The stack is chosen because it provides O(1) access to the most recent open tag, and is ideal for matching nested structures.
Let's walk through an example:
Input:
<A><B><![CDATA[CDATA_CONTENT]]></B></A>
i=0
, see <A>
→ push A
onto the stack.i=3
, see <B>
→ push B
onto the stack.i=6
, see <![CDATA[
→ skip to ]]>
(i = 25).i=25
, see </B>
→ top of stack is B
, pop it.i=29
, see </A>
→ top of stack is A
, pop it.i=33
, end of string. Stack is empty, so return true
.
If any tag names did not match, or there was extra content after the root tag closed, the function would return false
.
Brute-force approach: Using regular expressions or repeated string replacements could lead to O(N^2) time in the worst case, especially with deeply nested tags or large CDATA sections.
Optimized stack-based approach:
code
. Each character is processed at most once, and all stack operations are O(1).The use of a stack ensures we only store the open tags, and CDATA sections are skipped efficiently.
The "Tag Validator" problem is a test of string parsing and stack usage. By treating the tags as a nested structure and using a stack to match opening and closing tags, we can efficiently validate the code. CDATA sections are handled by skipping, and careful attention to tag name rules ensures correctness. The solution is elegant because it mimics the way XML parsers work, but with much simpler logic tailored to the problem's constraints.