2020-07-23 正则URL

2020-07-23  本文已影响0人  null_2562

前提:翻RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax时候,发现了已经写好的URL正则
地址戳这里
正则表达式:

^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?

光看记不住,拿C练了下手, C代码:

//
//  main.c
//  uri_manager
//
//  Created by null on 2020/7/21.
//  Copyright © 2020 null. All rights reserved.
//

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>


typedef struct uri_part_s {
    char scheme[8];
    char authority[256];
    char path[256];
    char query[256];
    char fragment[256];
}uri_part_s;

#define SCHEME_INDEX              2
#define AUTHORITY_INDEX           4
#define PATH_INDEX                5
#define QUERY_INDEX               7
#define FRAGMENT_INDEX            9

static uri_part_s uri_part(const char *uri, const char *re)
{
    struct uri_part_s struct_uri_parts;
    //正则初始化
    int status = 0, flag = REG_EXTENDED, max_re = 10;
    regmatch_t pmatch[max_re];
    regex_t reg;
    
    regcomp(&reg, re, flag);
    status = regexec(&reg, uri, max_re, pmatch, 0);
    //匹配成功
    for (int i = 0; i < max_re; i ++) {
        switch (i) {
            case SCHEME_INDEX:
            {
                char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                strlcpy(struct_uri_parts.scheme, pt, sizeof(struct_uri_parts.scheme));
            }
                break;
            case AUTHORITY_INDEX:
            {
                char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                strlcpy(struct_uri_parts.authority, pt, sizeof(struct_uri_parts.authority));
            }
                break;
            case PATH_INDEX:
            {
                char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                strlcpy(struct_uri_parts.path, pt, sizeof(struct_uri_parts.path));
            }
                break;
            case QUERY_INDEX:
            {
                char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                strlcpy(struct_uri_parts.query, pt, sizeof(struct_uri_parts.query));
            }
                break;
            case FRAGMENT_INDEX:
            {
                char *pt = strndup(uri + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
                strlcpy(struct_uri_parts.fragment, pt, sizeof(struct_uri_parts.fragment));
            }
                break;
                
            default:
                
                break;
        }
    }
    regfree(&reg);
    return struct_uri_parts;
}


/// RFC 2396  https://tools.ietf.org/html/rfc2396#appendix-B
int main(int argc, const char * argv[]) {
    // insert code here...
    
    const char *origin_uri = "http://www.ics.uc i.edu/pub/ietf/uri/abc?abc=cba#Related";
    const char *parts_re = "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(/?([^#]*))?(#(.*))?";
    
    struct uri_part_s parts = uri_part(origin_uri, parts_re);
    printf("%s\n", parts.scheme);
    printf("%s\n", parts.authority);
    printf("%s\n", parts.path);
    printf("%s\n", parts.query);
    printf("%s\n", parts.fragment);
    return 0;
}

特此记录一下
完成

上一篇下一篇

猜你喜欢

热点阅读