lml/lml.cpp

387 lines
6.6 KiB
C++

#include "lml.hpp"
using namespace Lml;
struct Token final
{
enum class Kind
{
Unexpected,
Newline,
Identifier,
Colon,
Comma,
BraceLeft,
BraceRight,
};
Kind kind {Kind::Unexpected};
std::string_view view {""};
};
static bool isLmlSymbol(char c)
{
switch (c)
{
case ':':
case '[':
case ']':
case '{':
case '}':
case ' ':
case ';':
case ',':
case '`':
case '\'':
case '\t':
case '\n':
case '"': return false;
default: return true;
}
}
struct Tokenizer final
{
std::string_view data {""};
std::optional<Token> Next()
{
while (!this->data.empty())
{
switch (this->data[0])
{
case ' ':
case '\t':
{
this->data = this->data.substr(1);
}
break;
case '\n':
{
this->data = this->data.substr(1);
return Token
{
.kind {Token::Kind::Newline},
.view {"\n"},
};
}
break;
case '{':
{
this->data = this->data.substr(1);
return Token
{
.kind {Token::Kind::BraceLeft},
.view {"{"},
};
}
break;
case '}':
{
this->data = this->data.substr(1);
return Token
{
.kind {Token::Kind::BraceRight},
.view {"}"},
};
}
break;
case ':':
{
this->data = this->data.substr(1);
return Token
{
.kind {Token::Kind::Colon},
.view {":"},
};
}
break;
case ',':
{
this->data = this->data.substr(1);
return Token
{
.kind {Token::Kind::Comma},
.view {","},
};
}
break;
case '"':
{
size_t const identifierEndIndex {this->data.find_first_of('"', 1)};
if (identifierEndIndex == std::string_view::npos)
{
this->data = this->data.substr(this->data.length() - 1, 0);
return Token
{
.kind {Token::Kind::Unexpected},
.view {""},
};
}
std::string_view const identifier = this->data.substr(1, identifierEndIndex - 1);
this->data = this->data.substr(identifierEndIndex + 1);
return Token
{
.kind {Token::Kind::Identifier},
.view {identifier},
};
}
break;
default:
{
size_t identifierLength {0};
{
size_t dataLength {this->data.length()};
while (identifierLength < this->data.length() && isLmlSymbol(this->data[identifierLength]))
{
identifierLength += 1;
}
}
if (identifierLength == 0)
{
return Token
{
.kind {Token::Kind::Unexpected},
.view {""},
};
}
std::string_view const identifier {this->data.substr(0, identifierLength)};
this->data = this->data.substr(identifierLength);
return Token
{
.kind {Token::Kind::Identifier},
.view {identifier},
};
};
}
}
return std::nullopt;
}
};
static std::optional<Scalar> ParseScalar(Tokenizer & tokenizer)
{
Scalar scalar {};
while (true)
{
std::optional const elementToken {tokenizer.Next()};
if (elementToken == std::nullopt)
{
return scalar;
}
switch (elementToken->kind)
{
case Token::Kind::Newline: return scalar;
case Token::Kind::Identifier:
{
scalar.push_back(std::string{elementToken->view});
}
break;
default: return std::nullopt;
}
std::optional const delimiterToken {tokenizer.Next()};
if (delimiterToken == std::nullopt)
{
return scalar;
}
switch (delimiterToken->kind)
{
case Token::Kind::Comma: continue;
case Token::Kind::Newline: return scalar;
default: return std::nullopt;
}
}
}
static bool ParseDocument(Tokenizer & tokenizer, Document & document, bool isRoot)
{
while (true)
{
std::optional const initialToken {tokenizer.Next()};
if (initialToken == std::nullopt)
{
return isRoot;
}
switch (initialToken->kind)
{
case Token::Kind::Newline: continue;
case Token::Kind::Identifier: break;
case Token::Kind::BraceRight: return !isRoot;
default: return false;
}
std::optional kindToken {tokenizer.Next()};
if (kindToken == std::nullopt)
{
return false;
}
if (kindToken->kind == Token::Kind::Colon)
{
std::optional const scalar {ParseScalar(tokenizer)};
if (scalar == std::nullopt)
{
return false;
}
document.properties[std::string{initialToken->view}] = *scalar;
}
else
{
while (kindToken->kind != Token::Kind::BraceLeft)
{
kindToken = tokenizer.Next();
if (kindToken == std::nullopt)
{
return false;
}
switch (kindToken->kind)
{
case Token::Kind::Newline: break;
case Token::Kind::BraceLeft: break;
default: return false;
}
}
std::optional const beginSpaceToken {tokenizer.Next()};
if (beginSpaceToken == std::nullopt)
{
return false;
}
switch (beginSpaceToken->kind)
{
case Token::Kind::Newline:
{
Document elementDocument {};
if (!ParseDocument(tokenizer, elementDocument, false))
{
return false;
}
document.elements.emplace_back(std::string{initialToken->view}, elementDocument);
}
break;
case Token::Kind::BraceRight:
{
document.elements.emplace_back(std::string{initialToken->view}, Document{});
std::optional const endSpaceToken {tokenizer.Next()};
if (endSpaceToken == std::nullopt)
{
return true;
}
switch (endSpaceToken->kind)
{
case Token::Kind::Newline: return true;
default: return false;
}
}
break;
default: return false;
}
}
}
}
bool Lml::Document::Parse(std::string_view const & data)
{
Tokenizer tokenizer {data};
return ParseDocument(tokenizer, *this, true);
}
std::optional<std::string> Lml::Document::Property(std::string_view const & property)
{
if (PropertyMap::iterator found {this->properties.find(property)}; found != this->properties.end())
{
if (!found->second.empty())
{
return found->second[0];
}
}
return std::nullopt;
}
void Lml::Document::SerializeTo(std::string & string, size_t indentation) const
{
for (auto const & [property, scalar] : this->properties)
{
string.append(indentation, '\t');
string.append(property);
string.append(": ");
if (!scalar.empty())
{
string.append(scalar[0]);
for (std::string const & scalarString : std::span<std::string const>{scalar.begin() + 1, scalar.end()})
{
string.append(", ");
string.append(scalarString);
}
}
string.push_back('\n');
}
for (auto const & [element, document] : this->elements)
{
string.append(indentation, '\t');
string.append(element);
string.append(" {\n");
document.SerializeTo(string, indentation + 1);
string.append(indentation, '\t');
string.append("}\n");
}
string.push_back('\n');
}