Files
workspace/cpp/JsonParse/include/JsonParse.cpp
2026-01-10 21:34:53 +08:00

389 lines
11 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "JsonParse.h"
#include "JsonError.h"
#include "JsonValue.h"
#include <cstddef>
class JsonValue;
//std::string src;
//size_t pos;
//JsonParser::JsonParser(std::string src){
bool isnum(char c){
if(c >= '0' && c <= '9') return true;
return false;
}
JsonValue JsonParser::parse(){
skip_white_space();
JsonValue result = parse_value();
skip_white_space();
if(peek() != '\0'){
error(JsonErrorCode::UnexpectedToken,
"Can't find the end of input"
);
}
return result;
}
void JsonParser::error(JsonErrorCode e, const std::string& msg){
throw JsonError(e, msg, pos, line, col);
}
void JsonParser::skip_white_space(){
while(pos < src.size()){
char c = src[pos];
if(c == ' ' || c == '\n' || c == '\t' || c == '\r') consume();
else break;
}
}
char JsonParser::peek() {
if(pos < src.size()) return src[pos];
return '\0';
}
char JsonParser::consume() {
if(pos >= src.size())return '\0';
char c = src[pos++];
if(c == '\n'){
line++;
col = 1;
} else {
col++;
}
return c;
}
static std::string encode_utf8(unsigned int code_point) {
std::string out;
if (code_point <= 0x7F) {
// 0xxxxxxx (ASCII)
out += static_cast<char>(code_point);
} else if (code_point <= 0x7FF) {
// 110xxxxx 10xxxxxx
out += static_cast<char>(0xC0 | ((code_point >> 6) & 0x1F));
out += static_cast<char>(0x80 | (code_point & 0x3F));
} else if (code_point <= 0xFFFF) {
// 1110xxxx 10xxxxxx 10xxxxxx
out += static_cast<char>(0xE0 | ((code_point >> 12) & 0x0F));
out += static_cast<char>(0x80 | ((code_point >> 6) & 0x3F));
out += static_cast<char>(0x80 | (code_point & 0x3F));
} else if (code_point <= 0x10FFFF) {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (辅助平面,如 Emoji)
out += static_cast<char>(0xF0 | ((code_point >> 18) & 0x07));
out += static_cast<char>(0x80 | ((code_point >> 12) & 0x3F));
out += static_cast<char>(0x80 | ((code_point >> 6) & 0x3F));
out += static_cast<char>(0x80 | (code_point & 0x3F));
}
return out;
}
// 辅助函数解析4位十六进制
unsigned int JsonParser::parse_hex_4() {
unsigned int code = 0;
for (int i = 0; i < 4; i++) {
char c = consume();
code <<= 4;
if (c >= '0' && c <= '9') code |= (c - '0');
else if (c >= 'a' && c <= 'f') code |= (c - 'a' + 10);
else if (c >= 'A' && c <= 'F') code |= (c - 'A' + 10);
else error(JsonErrorCode::InvalidEscape, "Invalid hex digit");
}
return code;
}
JsonValue JsonParser::parse_value() {
// *类内可以直接用 类的函数
skip_white_space();
char c = peek();
// 尾部\0结尾报错
if (c == '\0') error(JsonErrorCode::UnexpectedEnd, "Unexpected end of input");
//正常递归解析
if (c == '\"') return parse_string();
if (c == '[') return parse_list();
if (c == '{') return parse_map();
if (c == 't' || c == 'f') return parse_bool();
if (c == 'n') return parse_null();
if (c == '-' || isnum(c)) return parse_number();
//格式化其他错误并输出
std::string msg = "Unexpected Character: \'";
msg += c;
msg += "\'";
error(JsonErrorCode::UnexpectedToken, msg);
return JsonValue(); // 理论上跑不到这里上面已经throw error了
}
JsonValue JsonParser::parse_null(){
std::string pattern = "null";
for(char expected : pattern){
if(peek() == expected) consume();
else {
std::string msg = "Expected 'null', but found unexpected character: '";
msg += peek();
msg += "'";
error(JsonErrorCode::InvalidValue, msg);
}
}
return JsonValue(nullptr);
}
JsonValue JsonParser::parse_bool() {
//string_view { const char* ptr, size_t len} 不拷贝字符串
if (peek() == 't') {
for (char expected : std::string_view("true")) {
if (peek() == expected) consume();
else error(JsonErrorCode::InvalidValue, "Invalid boolean: expected 'true'");
}
return JsonValue(true);
}
else if (peek() == 'f') {
for (char expected : std::string_view("false")) {
if (peek() == expected) consume();
else error(JsonErrorCode::InvalidValue, "Invalid boolean: expected 'false'");
}
return JsonValue(false);
}
error(JsonErrorCode::InvalidValue, "Expected 'true' or 'false'");
return JsonValue();
/*
非公共前缀可以,公共前缀不行
解决方法:
1、增加预读深度
2、贪婪匹配+回滚匹配A不行则回滚再匹配B
3、提取公共前缀直接匹配后一个字符
* json中关键字没有重叠
*/
}
std::string JsonParser::parse_string_raw(){
consume(); // 消费开始的 "
std::string content;
while (peek() != '\"') {
char c = consume(); // 取得当前字符
if (c == '\\') {
// 遇到转义,看下一个字符
char next = consume();
switch (next) {
case 'n': content += '\n'; break;
case 't': content += '\t'; break;
case 'r': content += '\r'; break;
case '\\': content += '\\'; break;
case '\"': content += '\"'; break;
case 'u': {
// TODO: Unicode 处理
// 在 parse_string_raw 的 switch (next) 中:
unsigned int code = parse_hex_4();
// 检查是否是 UTF-16 高位代理 (Surrogate Pair)
if (code >= 0xD800 && code <= 0xDBFF) {
if (consume() != '\\' || consume() != 'u') {
error(JsonErrorCode::InvalidEscape, "Expected low surrogate pair \\uXXXX");
}
unsigned int low = parse_hex_4();
if (low < 0xDC00 || low > 0xDFFF) {
error(JsonErrorCode::InvalidEscape, "Invalid low surrogate");
}
// 计算真实的码点: L = (H - 0xD800) * 0x400 + (L - 0xDC00) + 0x10000
code = 0x10000 + (code - 0xD800) * 0x400 + (low - 0xDC00);
}
content += encode_utf8(code);
break;
}
break;
case '\0':
error(JsonErrorCode::UnexpectedEnd, "Unexpected EOF in escape sequence");
break;
default:
error(JsonErrorCode::InvalidEscape, "Invalid escape character");
break;
}
}
else if (c == '\0') {
// 还没遇到引号就结束了
error(JsonErrorCode::UnexpectedEnd, "String not closed before end of file");
}
else {
// 普通字符
content += c;
}
}
consume(); // 消费结尾的 "
return content;
}
JsonValue JsonParser::parse_string() {
return JsonValue(parse_string_raw());
}
JsonValue JsonParser::parse_list(){
consume();
skip_white_space();
std::vector<JsonValue> list;
if(peek() == ']') {
consume();
return JsonValue(std::move(list));
}
while(true){
skip_white_space();
list.push_back(parse_value());
skip_white_space();
char c = peek();
if(c == ',') {
consume();
} else if (c == ']') {
consume();
break; //正常结束循环
}
else {
std::string msg = "Expected \',\' or \']\' but found \'";
msg += c;
msg += "\'";
error(JsonErrorCode::UnexpectedToken, msg);
}
}
return JsonValue(std::move(list));
}
JsonValue JsonParser::parse_map(){
consume(); // {
skip_white_space();
std::map<std::string, JsonValue> mp;
if(peek() == '}') {
consume();
return JsonValue(std::move(mp));
}
while(true){
skip_white_space();
char c = peek();
if(c != '\"'){
// 首字符非" 则报错
std::string msg = "Expect \'\"\', but find \'";
msg += c;
msg += "\'";
error(JsonErrorCode::UnexpectedToken, msg);
}
std::string key = parse_string_raw();
// 处理 :
skip_white_space();
if(peek() != ':'){
std::string msg = "Expected \':\' after object key";
error(JsonErrorCode::UnexpectedToken, msg);
}
consume(); // :
skip_white_space();
// 处理 val
JsonValue val = parse_value();
// 检查重复 key
if(mp.find(key) != mp.end()){
error(JsonErrorCode::DuplicateKey, "Duplicate key found: " + key);
}
mp[std::move(key)] = std::move(val);
//mp.insert({key, val});
skip_white_space();
//处理结束后
char cc = peek();
if(cc == '}'){
consume();
break;
} else if(cc == ','){
consume();
//mp中还有继续下一个循环
} else {
std::string msg = "Expected \',\' or \'}\' in object";
error(JsonErrorCode::UnexpectedToken, msg);
}
}
return JsonValue(std::move(mp));
}
JsonValue JsonParser::parse_number(){
size_t start = pos;
//符号
if(peek() == '-'){
consume();
if(!isnum(peek())){
error(JsonErrorCode::InvalidNumber, "Expected digit after \'-\'");
}
}
//整数
if(peek() == '0'){
//前导0: 后面不能直接跟数字
consume();
if(isnum(peek())){
error(JsonErrorCode::InvalidNumber, "Leading 0 is not allowed");
}
} else if(isnum(peek())){
while(isnum(peek())) consume();
} else {
//非法字符
error(JsonErrorCode::InvalidNumber, "Expected digit in a integer part");
}
//小数
if(peek() == '.'){
consume();
if(!isnum(peek())){
error(JsonErrorCode::InvalidNumber, "Expected digit after \'.\'");
}
while(isnum(peek())){
consume();
}
}
//指数 e/E
if(peek() == 'e' || peek() == 'E'){
consume();
if(peek() == '+' || peek() == '-'){
consume();
}
if(!isnum(peek())){
error(JsonErrorCode::InvalidNumber, "Expected digit in exp");
}
while(isnum(peek())){
consume();
}
}
//扫描 + 字符串转换为数字 stod
std::string num_str = src.substr(start, pos - start); // 把str丢给stod
try {
double val = std::stod(num_str);
return JsonValue(val);
} catch (...) {
error(JsonErrorCode::InvalidNumber, "Failed to parse number");
}
return JsonValue();
}