一趟解析处理http

winston · 发表于 2011-12-3 11:50:52

来自：http://topic.csdn.net/u/20111128/14/d701ae5b-b6a3-45ff-a199-d068d77ecb5d.html
问题：
[{"iconNo":"1","seq":"1","devStatus":"1","devdescribe":"asd"}]

[{"iconNo":"2","seq":"2","devStatus":"2","devdescribe":"asasdsasdas师大d"}]

解析出各个字段，得到key-value。

我看Lighttpd解析http那一块代码大约1000行，不过它里边包括了http协议的处理，这里特意为了模仿它，写了一个基本类似的函数。

说说我为什么要写这么长吧，因为我假设用户的输入是任意风格的，也就是会有很多地方敲一些空格或者TAB，格式也比较乱，这里我还没有处理折叠格式，写时候忘了搞了，无非就是特殊对待一下\n。

http里的"\r\n" 对应这里的','。
另外，这里由于这里的key,value都被""包裹，我假定""内不允许存在空白字符，对于http，需要特殊判断key中间是否有空白字符。

这个办法靓点在哪里呢？就是一趟扫描，不需要回溯再去检查合法性，那么复杂的情况为何可以做到呢？就是用了state这个变量，将解析过程划分成了不同的阶段， 0阶段是解析一行第一次进入的状态，以后都不会再进入了， 1是解析key,2是解析:并且过度到3阶段，3阶段是解析val,4阶段是解析,或者行结尾。通过state，保证每一个case里我们完全把精力集中在一部分，简化了逻辑复杂性，希望大家有所体会。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static const char* parse_test[] =
{
"[{"iconNo":"1","seq":"1","devStatus":"1","devdescribe":"asd"}]",
"[{"iconNo":"2","seq":"2","devStatus":"2","devdescribe":"asasdsasdas师大d"}]"
};
int parse_key_value(const char *line)
{
// 主要检查以下不合法性：
//1, key或者value没有被'"'包裹
//2, ""内没有空白字符
//3, key : value 形式不合法
int key_start, dot, key_end, val_start, val_end;
int line_len;
int state = 0; //0:初始化,1:解析key,2:解析',',3:解析val,4:解析',' or '\0'
                  //or }]序列
int left_over = 0;
char key[20], value[30];
key_start = dot = key_end = val_start = val_end = -1;
line_len = strlen(line);

for (int i = 0; i <= line_len; /*<=line_len目的: 对','和'\0'同等对待*/)
{
      switch (state)
      {
         case 0:
         {
            switch (line[i])
            {
                  case ' ':
                  case '\t':
                  {
                     ++i;
                  }break;
                  case '[':
                  {
                     // 多余的[
                     if (left_over != 0)
                     {
                        return -1;
                     }
                     if (line[i+1] != '{')
                     {
                        return -1;
                     }
                     left_over++; //左侧"[{"序列合法
                     i+=2;
                  }break;
                  case '{': //不应该单独出现,此分支可以不写,被default处理
                  {
                     return -1;
                  }break;
                  case '"':
                  {
                     if (left_over == 0)
                     {
                        //还没遇到[{序列
                        return -1;
                     }
                     // key的左'"',记录key_start
                     key_start = ++ i;
                     ++ state; //进入解析key状态
                  }break;
                  default:
                  {
                     // 非空白字符 or 非[{序列 or 非'"' or '\0'
                     return -1;
                  };
            }
         }break;
         case 1:
         {
            switch (line[i])
            {
                  case ' ':
                  case '\t':
                  case '{':
                  case '[':
                  case ',':
                  case ':':
                  case '\0':
                  {
                     // key不应该包含非字母，这里只列举一些
                     return -1;
                  }break;
                  case '"':
                  {
                     //key结束,记录key_end
                     key_end = i-1;
                     if (key_end < key_start) //key为空
                     {
                        return -1;
                     }
                     ++ i;
                     state ++; // 进入解析':'状态
                  }break;
                  default:
                  {
                     // 任意合法字母
                     ++ i;
                  }break;
            }
         }break;
         case 2:
         {
            switch (line[i])
            {
                  case '\t':
                  case ' ':
                  {
                     // 空白字符掠过
                     ++ i;
                  }break;
                  case ':':
                  {
                     //找到':'
                     dot = i++;
                     state++;
                     //向后掠过所有的空白,检测"val的'"'
                     while (line[i] != '\0')
                     {
                        if (line[i] != ' ' && line[i] != '\t')
                        {
                              if (line[i] != '"')
                              {
                                 return -1; //遇到非空白非"字符
                              }
                              val_start = ++ i;
                              break;
                        }

                        ++ i;
                     }
                     if (val_start == -1)
                     {
                        return -1;
                     }
                  }break;
                  default:
                  {
                     //非合法,出错
                     return -1;
                  }break;
            }
         }break;
         case 3: //开始解析value
         {
            switch (line[i])
            {
                  //val内不应该有非字母字符
                  case '\t':
                  case ' ':
                  case ',':
                  case '\0':
                  case '[':
                  case '}':
                  case ']':
                  case '{':
                  {
                     return -1;
                  }break;
                  case '"':
                  {
                     val_end = i - 1;

                     if (val_end < val_start)
                     {
                        return -1;
                     }
                     ++ i;
                     state++;
                  }break;
                  default:
                  {
                     ++ i; //正常字母
                  }break;
            }
         }break;
         case 4: //最后阶段:如果是line末尾需要检测}]
                  //不是line末尾需要检测',',并且重新进入state = 1
         {
            // 直接快速的处理过去
            while (line[i] == ' ' || line[i] == '\t')
            {
                  ++ i;
            }
            if (line[i] == '\0')
            {
                  return -1; // line末尾，却没有}]
            }
            if (line[i] == ',')
            {
                  // 一个字段结束,打印key,value
                  strncpy(key, line + key_start, key_end - key_start + 1);
                  key[key_end - key_start + 1] = '\0';
                  strncpy(value, line + val_start, val_end - val_start + 1);
                  value[val_end - val_start + 1] = '\0';
                  printf("%s:%s\n", key, value);
                  key_start = key_end = val_start = val_end = -1;

                  // 向后找到value的"
                  while (line[++i] != '\0' && (line[i] == ' ' || line[i] == '\t'));

                  if (line[i] != '"')
                  {
                     return -1;    // , 之后非"，错误
                  }
                  key_start = ++ i;
                  state = 1; //直接进入解析value状态
            }
            else if (line[i] == '}')
            {
                  if (line[i+1] != ']')
                  {
                     return -1; //}]不完整
                  }

                  strncpy(key, line + key_start, key_end - key_start + 1);
                  key[key_end - key_start + 1] = '\0';
                  strncpy(value, line + val_start, val_end - val_start + 1);
                  value[val_end - val_start + 1] = '\0';
                  printf("%s:%s\n", key, value);
                  return 0; //解析完毕,}]之后再有字符也不理会了
            }
            else
            {
                  // 意外的字符
                  return -1;
            }
         }break;
         default:
         {
            fprintf(stderr, "unknown state %d \n", state);
            return -1;
         }break;
      }
}
}
int main()
{
char test_buffer[1000];
while (scanf("%s", test_buffer) == 1)
{
      parse_key_value(test_buffer);
}
for (int i = 0; i < 2; ++ i)
{
      parse_key_value(parse_test[i]);
}
return 0;
}
owenliang@linux-7lsl:~/csdn/src> ./main
iconNo:1
seq:1
devStatus:1
devdescribe:asd
iconNo:2
seq:2
devStatus:2
devdescribe:asasdsasdas师大d
复制代码

		自动登录	找回密码
密码			用户注册