您的位置:首页 > 理论基础 > 计算机网络

Linux企业级项目实践之网络爬虫(5)——处理配置文件

2014-08-28 01:31 691 查看
配置文件在Linux下使用得非常普遍,但是Linux下没有统一个配置文件标准。

我们把配置文件的规则制定如下:

1、把“#”视作注释开始

2、所有的配置项都都是以键值对的形式出现

3、严格区分大小写

4、允许数据类型为整型的配置项

5、允许数据类型为字符串类型的配置项

6、允许数据类型为逻辑型的配置项,取值为yes或者no。

同时我们需要对配置文件做初始化和载入两个操作。

代码如下:

/* confparser.c*/

#ifndef CONFPARSER_H
#define CONFPARSER_H

#include <vector>
using namespace std;

#define MAX_CONF_LEN  1024
#define CONF_FILE     "spider.conf"

/* see the spiderq.conf to get meaning foreach member variable below */
typedef struct Config {
int              max_job_num;
char            *seeds;
char            *include_prefixes;
char            *exclude_prefixes;
char            *logfile;
int              log_level;
int              max_depth;
int              make_hostdir;
int              stat_interval;

char *           module_path;
vector<char *>   modules;
vector<char *>  accept_types;
};

extern Config * initconfig();

extern void loadconfig(Config *conf);

#endif

/* confparser.c*/

#include "spider.h"
#include "qstring.h"
#include "confparser.h"

#define INF 0x7FFFFFFF

Config * initconfig()
{
Config *conf = (Config *)malloc(sizeof(Config));

conf->max_job_num = 10;
conf->seeds = NULL;
conf->include_prefixes = NULL;
conf->exclude_prefixes = NULL;
conf->logfile = NULL;
conf->log_level = 0;
conf->max_depth = INF;
conf->make_hostdir = 0;
conf->module_path = NULL;
conf->stat_interval = 0;
//conf->modules

return conf;
}

void loadconfig(Config *conf)
{
FILE *fp = NULL;
char buf[MAX_CONF_LEN+1];
int argc = 0;
char **argv = NULL;
int linenum = 0;
char *line = NULL;
const char *err = NULL;

if ((fp = fopen(CONF_FILE, "r")) == NULL) {
SPIDER_LOG(SPIDER_LEVEL_ERROR, "Can't load conf_file %s",CONF_FILE);
}

while (fgets(buf, MAX_CONF_LEN+1, fp) != NULL) {
linenum++;
line = strim(buf);

if (line[0] == '#' || line[0] == '\0') continue;

argv = strsplit(line, '=', &argc, 1);
if (argc == 2) {
if (strcasecmp(argv[0], "max_job_num") == 0) {
conf->max_job_num =atoi(argv[1]);
} else if (strcasecmp(argv[0], "logfile") == 0) {
conf->logfile =strdup(argv[1]);
} else if (strcasecmp(argv[0], "include_prefixes") == 0) {
conf->include_prefixes =strdup(argv[1]);
} else if (strcasecmp(argv[0], "exclude_prefixes") == 0) {
conf->exclude_prefixes =strdup(argv[1]);
} else if (strcasecmp(argv[0], "seeds") == 0) {
conf->seeds =strdup(argv[1]);
} else if (strcasecmp(argv[0], "module_path") == 0) {
conf->module_path =strdup(argv[1]);
} else if (strcasecmp(argv[0], "load_module") == 0) {
conf->modules.push_back(strdup(argv[1]));
} else if (strcasecmp(argv[0], "log_level") == 0) {
conf->log_level =atoi(argv[1]);
} else if (strcasecmp(argv[0],"max_depth") == 0) {
conf->max_depth =atoi(argv[1]);
} else if (strcasecmp(argv[0], "stat_interval") == 0) {
conf->stat_interval =atoi(argv[1]);
} else if (strcasecmp(argv[0], "make_hostdir") == 0) {
conf->make_hostdir =yesnotoi(argv[1]);
} else if (strcasecmp(argv[0], "accept_types") == 0) {
conf->accept_types.push_back(strdup(argv[1]));
} else {
err = "Unknowndirective"; goto conferr;
}
} else {
err = "directive must be 'key=value'"; goto conferr;
}

}
return;

conferr:
SPIDER_LOG(SPIDER_LEVEL_ERROR, "Bad directive in %s[line:%d]%s", CONF_FILE, linenum, err);
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: