c语言html转mht
2021-01-06 本文已影响0人
一路向后
1.crc.h
#ifndef _HTM_TO_MHT_CRC_H_
#define _HTM_TO_MHT_CRC_H_
unsigned int crc_buffer(const void *ss, int len);
unsigned int crc_file(const char *fn);
#endif
2.crc.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "crc.h"
const unsigned int crc32_table[256] = {
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL
};
unsigned int crc_buffer(const void *ss, int len)
{
const unsigned char *s = (const unsigned char *)ss;
unsigned int val = 0xffffffff;
while(--len >= 0)
{
val = crc32_table[(val ^ *s++) & 0xff] ^ (val >> 8);
}
return val;
}
unsigned int crc_file(const char *fn)
{
FILE *fp = NULL;
const int BUFSIZE = 1024*100;
char buffer[BUFSIZE];
unsigned int crc = 0xffffffff;
char *p = NULL;
int read = 0;
fp = fopen(fn, "rb");
if(fp == NULL)
{
fprintf(stderr, "cannot crc a file");
}
while(1)
{
read = fread(buffer, 1, BUFSIZE-1, fp);
for(p=buffer; p<buffer+read; ++p)
{
crc = crc32_table[(crc ^ *p) & 0xff] ^ (crc >> 8);
}
if(read<0 || read<BUFSIZE)
break;
}
fclose(fp);
return crc;
}
3.base64.h
#ifndef _BASE64_H_
#define _BASE64_H_
unsigned char *base64_encode(unsigned char *str, long str_len);
unsigned char *base64_decode(unsigned char *code);
#endif
4.base64.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "base64.h"
unsigned char *base64_encode(unsigned char *str, long str_len)
{
long len;
unsigned char *res;
int i,j;
//定义base64编码表
unsigned char *base64_table="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
//计算经过base64编码后的字符串长度
//str_len=strlen(str);
if(str_len % 3 == 0)
len=str_len/3*4;
else
len=(str_len/3+1)*4;
res=malloc(sizeof(unsigned char)*len+1);
res[len]='\0';
//printf("encode strlen=%d\n", str_len);
//printf("encode len=%d\n", len);
//以3个8位字符为一组进行编码
for(i=0,j=0;i<len-2;j+=3,i+=4)
{
res[i]=base64_table[str[j]>>2]; //取出第一个字符的前6位并找出对应的结果字符
res[i+1]=base64_table[(str[j]&0x3)<<4 | (str[j+1]>>4)]; //将第一个字符的后位与第二个字符的前4位进行组合并找到对应的结果字符
res[i+2]=base64_table[(str[j+1]&0xf)<<2 | (str[j+2]>>6)]; //将第二个字符的后4位与第三个字符的前2位组合并找出对应的结果字符
res[i+3]=base64_table[str[j+2]&0x3f]; //取出第三个字符的后6位并找出结果字符
}
switch(str_len % 3)
{
case 1:
res[i-2]='=';
res[i-1]='=';
break;
case 2:
res[i-1]='=';
break;
}
return res;
}
unsigned char *base64_decode(unsigned char *code)
{
//根据base64表,以字符找到对应的十进制数据
int table[]={0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,62,0,0,0,
63,52,53,54,55,56,57,58,
59,60,61,0,0,0,0,0,0,0,0,
1,2,3,4,5,6,7,8,9,10,11,12,
13,14,15,16,17,18,19,20,21,
22,23,24,25,0,0,0,0,0,0,26,
27,28,29,30,31,32,33,34,35,
36,37,38,39,40,41,42,43,44,
45,46,47,48,49,50,51
};
long len;
long str_len;
unsigned char *res;
int i,j;
//计算解码后的字符串长度
len=strlen(code);
//判断编码后的字符串后是否有=
if(strstr(code,"=="))
str_len=len/4*3-2;
else if(strstr(code,"="))
str_len=len/4*3-1;
else
str_len=len/4*3;
res=malloc(sizeof(unsigned char)*str_len+1);
res[str_len]='\0';
//以4个字符为一位进行解码
for(i=0,j=0;i < len-2;j+=3,i+=4)
{
res[j]=((unsigned char)table[code[i]])<<2 | (((unsigned char)table[code[i+1]])>>4); //取出第一个字符对应base64表的十进制数的前6位与第二个字符对应base64表的十进制数的后2位进行组合
res[j+1]=(((unsigned char)table[code[i+1]])<<4) | (((unsigned char)table[code[i+2]])>>2); //取出第二个字符对应base64表的十进制数的后4位与第三个字符对应bas464表的十进制数的后4位进行组合
res[j+2]=(((unsigned char)table[code[i+2]])<<6) | ((unsigned char)table[code[i+3]]); //取出第三个字符对应base64表的十进制数的后2位与第4个字符进行组合
}
return res;
}
5.main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include "base64.h"
#include "crc.h"
#define CONVERT_STATE_1(p) q = CONVERT(*(p++)); \
strcat(buffer2, q); \
free(q);
#define CONVERT_STATE_2(p) q = CONVERT(*(p)); \
strcat(buffer2, q); \
free(q);
#define EATNONALPHA(p) while(!isalpha(*p)) p++
#define EATSPACE(p) while(isspace(*p)) p++
#define EATNONALPHACOPY(p) while(!isalpha(*p)) {CONVERT_STATE_1(p)}
#define EATSPACECOPY(p) while(isspace(*p)) {CONVERT_STATE_1(p)}
struct FIELD {
int id;
char name[512];
unsigned int crc;
long length;
};
static struct FIELD files[128];
static int fileid = 0;
const char *BOUNDARYDECL="----=_NextPart_000_0076_01C29953.BE473C30";
const char *BOUNDARY="------=_NextPart_000_0076_01C29953.BE473C30";
char *CONVERT(char p);
//判断是否相对路径
short PathIsRelative(char *path)
{
if(isalnum(path[0]))
{
return 1;
}
else if(path[0] == '.' && (path[1] == '/' || path[1] == '\\'))
{
return 1;
}
else if(path[0] == '/')
{
return 0;
}
else
{
return -1;
}
}
char *getPath(const char *path, int len)
{
char *a = malloc(len+1);
strncpy(a, path, len);
a[len] = 0x00;
return a;
}
unsigned long get_file_size(const char *filename)
{
struct stat buf;
if(stat(filename, &buf) < 0)
{
return 0;
}
return (unsigned long)buf.st_size;
}
char *get_file_suffix(char *const filename)
{
char *suffix = malloc(32);
char *p = NULL;
int len, i;
int k = 0;
strcpy(suffix, "NO RESULT");
len = strlen(filename);
for(i=len-1; i>=0; i--)
{
if(filename[i] == '.')
{
p = filename+i+1;
k = strlen(p);
if(k < 32)
{
strcpy(suffix, filename+i+1);
break;
}
}
}
return suffix;
}
char *locationFromPath(const char *file)
{
char *f = NULL;
char *g = NULL;
char *p = NULL;
int len = strlen(file);
f = (char *)malloc(len+10);
g = (char *)malloc(len+1);
p = g;
memset(f, 0x00, len+10);
memset(g, 0x00, len+1);
strncpy(f, "file:///", 8);
strncpy(g, file, len);
for(p=g; p<g+len; p++)
{
if(*p=='\\')
*p='/';
}
p=g;
if(*p=='/')
p++;
strcat(f, g);
free(g);
return f;
}
char *processFile(const char *file, char *boundary)
{
FILE *fp = NULL;
char *buffer1 = NULL;
char *buffer2 = NULL;
char *buffer3 = NULL;
char *locfile = NULL;
long fsize = get_file_size(file);
int len = 0;
buffer1 = malloc(fsize+1);
memset(buffer1, 0x00, fsize+1);
fp = fopen(file, "rb");
if(fp == NULL)
{
free(buffer1);
return NULL;
}
len = fread(buffer1, 1, fsize, fp);
fclose(fp);
buffer2 = base64_encode((unsigned char *)buffer1, fsize);
fsize = strlen(buffer2);
free(buffer1);
locfile = locationFromPath(file);
len = strlen(locfile);
buffer3 = malloc(fsize+len+512);
memset(buffer3, 0x00, fsize+len+512);
sprintf(buffer3, "--%s\r\nContent-Type: application/octet-stream;\r\nContent-Transfer-Encoding: base64\r\nContent-Location: %s\r\n\r\n%s\r\n\r\n", boundary, locfile, buffer2);
free(locfile);
free(buffer2);
return buffer3;
}
char *processText(const char *file, char *boundary)
{
FILE *fp = NULL;
char *buffer1 = NULL;
char *buffer2 = NULL;
char *buffer3 = NULL;
char *locfile = NULL;
long fsize = get_file_size(file);
int len = 0;
buffer1 = malloc(fsize+1);
memset(buffer1, 0x00, fsize+1);
fp = fopen(file, "rb");
if(fp == NULL)
{
free(buffer1);
return NULL;
}
fread(buffer1, 1, fsize, fp);
fclose(fp);
buffer2 = buffer1;
locfile = locationFromPath(file);
len = strlen(locfile);
buffer3 = malloc(fsize+len+512);
memset(buffer3, 0x00, fsize+len+512);
sprintf(buffer3, "--%s\r\nContent-Type: application/octet-stream;\r\nContent-Transfer-Encoding: text/plain\r\nContent-Location: %s\r\n\r\n%s\r\n\r\n", boundary, locfile, buffer2);
free(locfile);
free(buffer2);
return buffer3;
}
char *processHTML(const char *file, char *boundary)
{
FILE *fp = NULL;
char *buffer1 = NULL;
char *buffer2 = NULL;
char *buffer3 = NULL;
char *locfile = NULL;
char *tmppath = NULL;
char *p = NULL;
char *q = NULL;
long fsize = get_file_size(file);
int len = 0;
buffer1 = malloc(fsize+1);
memset(buffer1, 0x00, fsize+1);
fp = fopen(file, "rb");
if(fp == NULL)
{
free(buffer1);
return NULL;
}
fread(buffer1, 1, fsize, fp);
fclose(fp);
buffer2 = malloc(fsize*3);
memset(buffer2, 0x00, sizeof(buffer2));
for(p=buffer1; p<buffer1+fsize; p++)
{
if(*p == '<')
{
if(p[1]=='/')
goto __skipToNextTag;
EATSPACECOPY(p);
if(((p[1]=='a'||p[1]=='A')&&!isalpha(p[2]))||(strncmp((char *)p+1,"link",4)==0&&!isalpha(p[5])))
{
char *p1 = strstr((char *)p, "href");
char *p1end;
if(!p1)
goto __skipToNextTag;
p1 = strstr((char *)p1,"=");
p1++;
EATSPACE(p1);
if(*p1=='\"')
{
p1++;
p1end = strstr(p1, "\"");
}
else
{
char *t1 = strstr(p1, " ");
char *t2 = strstr(p1, ">");
if(t1 >= 0)
p1end = t1 < t2 ? t1 : t2;
else
p1end = t2;
}
tmppath = getPath(p1, p1end-p1);
//printf("size=%d\n", p1end-p1);
//printf("tmppath=%s\n", tmppath);
char *pk = NULL;
for(char *pk=p; pk<p1; pk++)
{
CONVERT_STATE_2(pk)
}
p = p1end;
q = locationFromPath(tmppath);
strcat(buffer2, q);
free(tmppath);
free(q);
}
else if((strncmp((char *)p+1,"img",3)==0&&isspace(p[4]))||(strncmp((char *)p+1,"frame",5)==0)&&isspace(p[6]))
{
char *p1 = strstr((char *)p, "src");
char *p1end;
if(!p1)
goto __skipToNextTag;
p1 = strstr((char *)p1, "=");
p1++;
EATSPACE(p1);
if(*p1=='\"')
{
p1++;
p1end = strstr(p1, "\"");
}
else
{
char *t1 = strstr(p1, " ");
char *t2 = strstr(p1, ">");
if(t1 >= 0)
p1end = t1 < t2 ? t1 : t2;
else
p1end = t2;
}
tmppath = getPath(p1, p1end-p1);
//printf("size=%d\n", p1end-p1);
//printf("tmppath=%s\n", tmppath);
char *pk = NULL;
for(char *pk=p; pk<p1; pk++)
{
CONVERT_STATE_2(pk)
}
p = p1end;
q = locationFromPath(tmppath);
strcat(buffer2, q);
free(tmppath);
free(q);
}
__skipToNextTag:
while(*p != '>')
{
CONVERT_STATE_1(p)
}
CONVERT_STATE_2(p)
}
else
{
CONVERT_STATE_2(p)
}
}
locfile = locationFromPath(file);
len = strlen(locfile);
buffer3 = malloc(fsize+len+512);
memset(buffer3, 0x00, fsize+len+512);
sprintf(buffer3, "--%s\r\nContent-Type: text/html;\r\nContent-Transfer-Encoding: quoted-printable\r\nContent-Location: %s\r\n\r\n%s\r\n\r\n", boundary, locfile, buffer2);
free(locfile);
free(buffer2);
return buffer3;
}
char *CONVERT(char p)
{
char *str = malloc(5);
memset(str, 0x00, 5);
if(p == '\n')
{
strcpy(str, "=20");
}
else if(p == '=')
{
strcpy(str, "=3D");
}
else
{
str[0] = p;
}
return str;
}
char *process(const char *file, char *boundary)
{
char *suffix = get_file_suffix((char *const)file);
if(strcmp(suffix, "htm") == 0 || strcmp(suffix, "html") == 0 || strcmp(suffix, "shtm") == 0 || strcmp(suffix, "phtm") == 0)
{
free(suffix);
return processHTML(file, boundary);
}
else if(strcmp(suffix, "cpp") == 0 || strcmp(suffix, "hpp") == 0 || strcmp(suffix, "h") == 0 || strcmp(suffix, "c") == 0 || strcmp(suffix, "inl") == 0 || strcmp(suffix, "txt") == 0)
{
free(suffix);
return processText(file, boundary);
}
else
{
free(suffix);
return processFile(file, boundary);
}
}
void getFiles(char *dir, struct FIELD **file)
{
FILE *fp = NULL;
char cmdline[512];
char buffer[1024];
int len;
sprintf(cmdline, "find %s -type f | sort", dir);
fp = popen(cmdline, "r");
if(fp == NULL)
{
return;
}
while(!feof(fp))
{
memset(buffer, 0x00, sizeof(buffer));
fgets(buffer, 1023, fp);
len = strlen(buffer);
if(buffer[len-1] == '\n')
buffer[len-1] = 0x00;
if(buffer[0] == 0x00)
continue;
files[fileid].id = fileid+11;
files[fileid].crc = crc_file(buffer);
strcpy(files[fileid].name, buffer);
fileid++;
}
fclose(fp);
}
int main(int argc, char **argv)
{
struct FIELD file;
char boundary[512];
char *s = NULL;//process(argv[1]);
int i = 0;
char t[1024];
memset(&files, 0x00, sizeof(files));
memset(&file, 0x00, sizeof(file));
memset(t, 0x00, sizeof(t));
getFiles(argv[1], NULL);
sprintf(boundary, "----=_NextPart_000_0001_01C29953.BE473C30");
strcat(t, "MIME-version: 1.0\r\nContent-Type: multipart/related;\r\n\tboundary=\"");
strcat(t, boundary);
strcat(t, "\";\r\n\ttype=\"multipart/alternative\"\r\nX-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1106\r\n\r\nThis is a multi-part message in MIME format.\r\n\r\n");
printf("%s", t);
printf("--%s\r\n", boundary);
sprintf(boundary, "----=_NextPart_000_0011_01C29953.BE473C30");
printf("Content-Type: multipart/alternative;\r\n\tboundary=\"%s\"\r\n\r\n", boundary);
for(i=0; i<fileid; i++)
{
//printf("file=%s\n", files[i].name);
sprintf(boundary, "----=_NextPart_000_%04d_01C29953.BE473C30", 11);
s = process(files[i].name, boundary);
printf("%s", s);
free(s);
}
printf("--%s--\r\n\r\n", boundary);
sprintf(boundary, "----=_NextPart_000_0001_01C29953.BE473C30");
printf("--%s--\r\n", boundary);
return 0;
}
6.编译源码
$ gcc -o htmltomht main.c crc.c base64.c
7.运行程序
$ ./htmltomht test > test.mht