您的位置:首页 > 运维架构 > Shell

erlang抽象码与basho的protobuf(四)代码生成原理之代码生成

2012-07-07 22:19 232 查看

上文介绍了protobuffs的语义分析过程,发现其收集了目标proto文件及其import文件的符号表,此处继续观察代码生成过程。

符号表的格式如下:

 

[

Message1 = {MessageName,

                      [

                         Field1 = {FieldId,

                                         FieldRule(required/optional/repeated/repeated_packed...),

                                         FieldType(ScalarType, EnumType,OtherType),

                                         FieldName,Other(DefaultValue)},

                         Field2,...,Fieldn]},

Message2,...Messagen,

Enum1 = {enum, EnumName, EnumValue, EnumAtom},

Enum2,...,Enumn

]。

首先来看头文件的生成。

protobuffs_compile.erl

 

generate_source(ProtoFile,Options) when is_list (ProtoFile) ->

    Basename = filename:basename(ProtoFile, ".proto") ++ "_pb",

    {ok,FirstParsed} = parse(ProtoFile),

    ImportPaths = ["./", "src/" | proplists:get_value(imports_dir, Options, [])],

    Parsed = parse_imports(FirstParsed, ImportPaths),

    Collected = collect_full_messages(Parsed), 

    Messages = resolve_types(Collected#collected.msg,Collected#collected.enum),

    output_source (Basename, Messages, Collected#collected.enum, Options).

 

output_source (Basename, Messages, Enums, Options) ->

    case proplists:get_value(output_include_dir,Options) of

undefined ->

   HeaderFile = Basename ++ ".hrl";

HeaderPath ->

   HeaderFile = filename:join(HeaderPath,Basename) ++ ".hrl"

    end,

    error_logger:info_msg("Writing header file to ~p~n",[HeaderFile]),

    ok = write_header_include_file(HeaderFile, Messages),

    PokemonBeamFile = filename:dirname(code:which(?MODULE)) ++ "/pokemon_pb.beam",

    {ok,{_,[{abstract_code,{_,Forms}}]}} = beam_lib:chunks(PokemonBeamFile, [abstract_code]),

    Forms1 = filter_forms(Messages, Enums, Forms, Basename, []),

    case proplists:get_value(output_src_dir,Options) of

undefined ->

   SrcFile = Basename ++ ".erl";

SrcPath ->

   SrcFile = filename:join(SrcPath,Basename) ++ ".erl"

    end,

    error_logger:info_msg("Writing src file to ~p~n",[SrcFile]),

    file:write_file(SrcFile, erl_prettypr:format(erl_syntax:form_list (Forms1))).

头文件的生成过程比较简单,仅仅是打开头文件,并向其内部写入message对应的record定义。

 

write_header_include_file(Basename, Messages) ->

    {ok, FileRef} = file:open(Basename, [write]),

    [begin

OutFields = [{string:to_lower(A), Optional, Default} || {_, Optional, _, A, Default} <- lists:keysort(1, Fields)],

if

    OutFields /= [] ->

io:format(FileRef, "-record(~s, {~n    ", [string:to_lower(Name)]),

WriteFields = generate_field_definitions(OutFields),

FormatString = string:join(["~s" || _ <- lists:seq(1, length(WriteFields))], ",~n    "),

io:format(FileRef, FormatString, WriteFields),

io:format(FileRef, "~n}).~n~n", []);

    true ->

ok

end

     end || {Name, Fields} <- Messages],

    file:close(FileRef).

 

 

generate_field_definitions(Fields) ->

    generate_field_definitions(Fields, []).

generate_field_definitions([], Acc) ->

    lists:reverse(Acc);

generate_field_definitions([{Name, required, _} | Tail], Acc) ->

    Head = lists:flatten(io_lib:format("~s = erlang:error({required, ~s})", [Name, Name])),

    generate_field_definitions(Tail, [Head | Acc]);

generate_field_definitions([{Name, _, none} | Tail], Acc) ->

    Head = lists:flatten(io_lib:format("~s", [Name])),

    generate_field_definitions(Tail, [Head | Acc]);

generate_field_definitions([{Name, optional, Default} | Tail], Acc) ->

    Head = lists:flatten(io_lib:format("~s = ~p", [Name, Default])),

    generate_field_definitions(Tail, [Head | Acc]).

可以看到头文件的生成仅仅是循规蹈矩的将message的每个域定义转换成对应的record的域定义。

接着来看源文件的生成,此处的代码才是protobuffs的精髓所在,前面的连篇累牍仅仅是铺垫。

 

output_source (Basename, Messages, Enums, Options) ->

    case proplists:get_value(output_include_dir,Options) of

undefined ->

   HeaderFile = Basename ++ ".hrl";

HeaderPath ->

   HeaderFile = filename:join(HeaderPath,Basename) ++ ".hrl"

    end,

    error_logger:info_msg("Writing header file to ~p~n",[HeaderFile]),

    ok = write_header_include_file(HeaderFile, Messages),

    PokemonBeamFile = filename:dirname(code:which(?MODULE)) ++ "/pokemon_pb.beam",

    {ok,{_,[{abstract_code,{_,Forms}}]}} = beam_lib:chunks(PokemonBeamFile, [abstract_code]),

    Forms1 = filter_forms(Messages, Enums, Forms, Basename, []),

    case proplists:get_value(output_src_dir,Options) of

undefined ->

   SrcFile = Basename ++ ".erl";

SrcPath ->

   SrcFile = filename:join(SrcPath,Basename) ++ ".erl"

    end,

    error_logger:info_msg("Writing src file to ~p~n",[SrcFile]),

    file:write_file(SrcFile, erl_prettypr:format(erl_syntax:form_list (Forms1))).

注意此处的pokemon_pb.beam,莫名其妙的冒出个pokemon,确实很奇怪,口袋妖怪和protobuffs有什么关系呢?

beam_lib:chunks(BeamFile, [abstract_code])将beam文件还原成抽象码,抽象码的格式如http://www.erlang.org/doc/apps/erts/absform.html所述。

pokemon.beam由pokemon.erl生成,该文件是protobuffs用于生成其它proto对应的源文件的代码模板,其内部的主要结构如下:

pokemon_pb.erl

-record(pikachu, {abc, def}).

 

encode_pikachu(Record) when is_record(Record, pikachu) ->

    encode(pikachu, Record).

 

 

 

encode(pikachu, Record) ->

    iolist_to_binary(iolist(pikachu, Record)).

 

 

iolist(pikachu, Record) ->

    [pack(1, required, with_default(Record#pikachu.abc, none), string, [])].

 

 

enum_to_int(pikachu,value) ->

    1.

 

int_to_enum(_,Val) ->

    Val.

 

 

 

decode_pikachu(Bytes) when is_binary(Bytes) ->

    decode(pikachu, Bytes).

 

decode(pikachu, Bytes) when is_binary(Bytes) ->

    Types = [{1, abc, int32, []}, {2, def, double, []}],

    Decoded = decode(Bytes, Types, []),

    to_record(pikachu, Decoded).

 

 

to_record(pikachu, DecodedTuples) ->

    lists:foldl(

        fun({_FNum, Name, Val}, Record) ->

            set_record_field(record_info(fields, pikachu), Record, Name, Val)

        end, #pikachu{}, DecodedTuples).

看了这个套路,是不是有点眼熟呢,第一章的rds_la_pb.erl也是这个套路,而且,那个pikachu原来来自于这里,basho的开发者还真萌,模板都是用pikachu做的。

这个文件主要部分的抽象码如下:

{attribute,1,file,{"src/pokemon_pb.erl",1}},

{attribute,25,module,pokemon_pb},

{attribute,26,export,[{encode_pikachu,1},{decode_pikachu,1}]},

{attribute,28,record,{pikachu,[{record_field,28,{atom,28,abc}},{record_field,28,{atom,28,def}}]}},

 

{function,34,encode_pikachu,1,

[{clause,34,

   [{var,34,'Record'}],

   [[{call,34,

{atom,34,is_record},

[{var,34,'Record'},{atom,34,pikachu}]}]],

   [{call,35,

{atom,35,encode},

[{atom,35,pikachu},{var,35,'Record'}]}]}]},

{function,37,encode,2,

[{clause,37,

   [{atom,37,pikachu},{var,37,'Record'}],

   [],

   [{call,38,

{atom,38,iolist_to_binary},

[{call,38,

    {atom,38,iolist},

    [{atom,38,pikachu},{var,38,'Record'}]}]}]}]},

 

{function,40,iolist,2,

[{clause,40,

   [{atom,40,pikachu},{var,40,'Record'}],

   [],

   [{cons,41,

{call,41,

   {atom,41,pack},

   [{integer,41,1},

    {atom,41,required},

    {call,41,

{atom,41,with_default},

[{record_field,41,{var,41,'Record'},pikachu,{atom,41,abc}},

 {atom,41,none}]},

    {atom,41,string},

    {nil,41}]},

{nil,41}}]}]},

 

{function,80,enum_to_int,2,

[{clause,80,

   [{atom,80,pikachu},{atom,80,value}],

   [],

   [{integer,81,1}]}]},

{function,83,int_to_enum,2,

[{clause,83,

   [{var,83,'_'},{var,83,'Val'}],

   [],

   [{var,84,'Val'}]}]},

{function,87,decode_pikachu,1,

[{clause,87,

   [{var,87,'Bytes'}],

   [[{call,87,{atom,87,is_binary},[{var,87,'Bytes'}]}]],

   [{call,88,

{atom,88,decode},

[{atom,88,pikachu},{var,88,'Bytes'}]}]}]},

{function,90,decode,2,

[{clause,90,

   [{atom,90,pikachu},{var,90,'Bytes'}],

   [[{call,90,{atom,90,is_binary},[{var,90,'Bytes'}]}]],

   [{match,91,

{var,91,'Types'},

{cons,91,

   {tuple,91,

[{integer,91,1},{atom,91,abc},{atom,91,int32},{nil,91}]},

   {cons,91,

{tuple,91,

   [{integer,91,2},{atom,91,def},{atom,91,double},{nil,91}]},

{nil,91}}}},

    {match,92,

{var,92,'Decoded'},

{call,92,

   {atom,92,decode},

   [{var,92,'Bytes'},{var,92,'Types'},{nil,92}]}},

    {call,93,

{atom,93,to_record},

[{atom,93,pikachu},{var,93,'Decoded'}]}]}]},

 

{function,136,to_record,2,

[{clause,136,

   [{atom,136,pikachu},{var,136,'DecodedTuples'}],

   [],

   [{call,137,

{remote,137,{atom,137,lists},{atom,137,foldl}},

[{'fun',138,

    {clauses,

[{clause,138,

     [{tuple,138,

  [{var,138,'_FNum'},{var,138,'Name'},{var,138,'Val'}]},

      {var,138,'Record'}],

     [],

     [{call,139,

  {atom,139,set_record_field},

  [{call,139,

{atom,139,record_info},

[{atom,139,fields},{atom,139,pikachu}]},

   {var,139,'Record'},

   {var,139,'Name'},

   {var,139,'Val'}]}]}]}},

{record,140,pikachu,[]},

{var,140,'DecodedTuples'}]}]}]},

这个文件的抽象码中还有很多部分,但此处仅列出这些部分,因为这些部分是和接下来的源文件生成息息相关的部分。

 

output_source (Basename, Messages, Enums, Options) ->

    case proplists:get_value(output_include_dir,Options) of

undefined ->

   HeaderFile = Basename ++ ".hrl";

HeaderPath ->

   HeaderFile = filename:join(HeaderPath,Basename) ++ ".hrl"

    end,

    error_logger:info_msg("Writing header file to ~p~n",[HeaderFile]),

    ok = write_header_include_file(HeaderFile, Messages),

    PokemonBeamFile = filename:dirname(code:which(?MODULE)) ++ "/pokemon_pb.beam",

    {ok,{_,[{abstract_code,{_,Forms}}]}} = beam_lib:chunks(PokemonBeamFile, [abstract_code]),

    Forms1 = filter_forms(Messages, Enums, Forms, Basename, []),

    case proplists:get_value(output_src_dir,Options) of

undefined ->

   SrcFile = Basename ++ ".erl";

SrcPath ->

   SrcFile = filename:join(SrcPath,Basename) ++ ".erl"

    end,

    error_logger:info_msg("Writing src file to ~p~n",[SrcFile]),

    file:write_file(SrcFile, erl_prettypr:format(erl_syntax:form_list (Forms1))).

接下来的部分,就是protobuffers处理抽象码的技巧,它将这些与特定proto文件相关的代码部分,用之前获得的类型符号表定义进行替换,从而生成新的,特定于proto文件的抽象码,然后利用erl_prettypr:format(erl_syntax:form_list (Forms1))的调用过程,由抽象码直接生成源文件,这样极大地简化了代码生成过程,而且最大限度的复用了pokemon_pb.erl的其它函数。

 

filter_forms(Msgs, Enums, [{attribute,L,file,{_,_}}|Tail], Basename, Acc) ->

    filter_forms(Msgs, Enums, Tail, Basename, [{attribute,L,file,{"src/" ++ Basename ++ ".erl",L}}|Acc]);

这里是对文件名属性的替换;

 

filter_forms(Msgs, Enums, [{attribute,L,module,pokemon_pb}|Tail], Basename, Acc) ->

    filter_forms(Msgs, Enums, Tail, Basename, [{attribute,L,module,list_to_atom(Basename)}|Acc]);

这里是对模块名的替换,模块名和文件名均来自于proto文件的package定义;

 

filter_forms(Msgs, Enums, [{attribute,L,export,[{encode_pikachu,1},{decode_pikachu,1}]}|Tail], Basename, Acc) ->

    Exports = lists:foldl(

fun({Name,_}, Acc1) ->

[{list_to_atom("encode_" ++ string:to_lower(Name)),1},

{list_to_atom("decode_" ++ string:to_lower(Name)),1} | Acc1]

end, [], Msgs),

    filter_forms(Msgs, Enums, Tail, Basename, [{attribute,L,export,Exports}|Acc]);

这里是对到处函数的替换,可以看到,新的到处函数来源于proto文件的message名字;

 

filter_forms(Msgs, Enums, [{attribute,L,record,{pikachu,_}}|Tail], Basename, Acc) ->

    Records = [begin

  OutFields = [string:to_lower(A) || {_, _, _, A, _} <- lists:keysort(1, Fields)],

  Frm_Fields = [{record_field,L,{atom,L,list_to_atom(OutField)}}|| OutField <- OutFields],

  {attribute, L, record, {atomize(Name), Frm_Fields}}

      end || {Name, Fields} <- Msgs],

    filter_forms(Msgs, Enums, Tail, Basename, Records ++ Acc);

这里是对record定义的替换,可以看到,record的定义来自于proto文件的message的各个域,域的名字将被转换为小写;

 

filter_forms(Msgs, Enums, [{function,L,encode_pikachu,1,[Clause]}|Tail], Basename, Acc) ->

    Functions = [begin

    {function,L,list_to_atom("encode_" ++ string:to_lower(Name)),1,[replace_atom(Clause, pikachu, atomize(Name))]} 

end || {Name, _} <- Msgs],

    filter_forms(Msgs, Enums, Tail, Basename, Functions ++ Acc);

 

replace_atom(Find, Find, Replace) -> Replace;

replace_atom(Tuple, Find, Replace) when is_tuple(Tuple) ->

    list_to_tuple([replace_atom(Term, Find, Replace) || Term <- tuple_to_list(Tuple)]);

replace_atom(List, Find, Replace) when is_list(List) ->

    [replace_atom(Term, Find, Replace) || Term <- List];

replace_atom(Other, _Find, _Replace) ->

    Other.

 

这里是对encode_xxx/1函数的替换,可以看到,对于每一个message,都将有一个encode_xxx/1函数,这个函数的生成过程非常简单,仅仅是替换了函数的名字和对encode/2的调用参数的值;

 

filter_forms(Msgs, Enums, [{function,L,encode,2,[Clause]}|Tail], Basename, Acc) ->

    filter_forms(Msgs, Enums, Tail, Basename, [expand_encode_function(Msgs, L, Clause)|Acc]);

 

expand_encode_function(Msgs, Line, Clause) ->

    {function,Line,encode,2,[filter_encode_clause(Msg, Clause) || Msg <- Msgs]}.

 

filter_encode_clause({MsgName, _Fields}, {clause,L,_Args,Guards,_Content}) ->

    ToBin = {call,L,{atom,L,iolist_to_binary},[{call,L,

                                                {atom,L,iolist},

                                                [{atom,L,atomize(MsgName)},{var,L,'Record'}]}]},

    {clause,L,[{atom,L,atomize(MsgName)},{var,L,'Record'}],Guards,[ToBin]}.

 

 

这里是对encode/2函数的替换,可以看到,对于每一个message,都将有一个encode(xxx,Record),不同的message,对应的clause子句不同,{atom,L,atomize(MsgName)}即为encode/2各个clause的第一个参数,同时也替换了encode/2调用iolist函数的第一个参数;

 

 filter_forms(Msgs, Enums, [{function,L,iolist,2,[Clause]}|Tail], Basename, Acc) ->

     filter_forms(Msgs, Enums, Tail, Basename, [expand_iolist_function(Msgs, L, Clause)|Acc]);

 

expand_iolist_function(Msgs, Line, Clause) ->

    {function,Line,iolist,2,[filter_iolist_clause(Msg, Clause) || Msg <- Msgs]}.

filter_iolist_clause({MsgName, Fields}, {clause,L,_Args,Guards,_Content}) ->

    Cons = lists:foldl(

    fun({FNum,Tag,SType,SName,Default}, Acc) ->

    {cons,L,

     {call,L,{atom,L,pack},[{integer,L,FNum},

    {atom,L,Tag},

    {call,L,

     {atom,L,with_default},

     [{record_field,L,

{var,L,'Record'},atomize(MsgName),

{atom,L,atomize(SName)}},

      erl_parse:abstract(Default)]},

    {atom,L,atomize(SType)},

    {nil,L}]},

     Acc}

    end, {nil,L}, Fields),

    {clause,L,[{atom,L,atomize(MsgName)},{var,L,'Record'}],Guards,[Cons]}.

这个函数的替换较为复杂,首先还原它的源码:

 

iolist(pikachu, Record) ->

    [pack(1, required, with_default(Record#pikachu.abc, none), string, [])].

这个函数保存了一个message的所有域定义,且会调用pack函数进行实际的类型编码,最终返回message对应的record的编码列表。对比以下第一章生成的代码:

 

iolist(la_record, Record) ->

    [pack(1, required,

          with_default(Record#la_record.name, none), string, []),

     pack(2, required,

          with_default(Record#la_record.timestamp, none), int32,

          []),

     pack(3, required,

          with_default(Record#la_record.pb_query, none), string,

          []),

     pack(4, required,

          with_default(Record#la_record.query_time, none), int32,

          []),

     pack(5, required,

          with_default(Record#la_record.response_time, none),

          int32, [])].

可以看到,这个函数的替换仅仅是在返回值上做了手脚,扩展了返回列表,对于每一个required的域,都根据类型定义和域名字生成一个列表条目,并用于编码。

这样替换过程就好理解了,由于抽象码中将列表表示成一系列cons元组的嵌套,因此替换过程仅仅是遍历message的域定义,将域定义转换成调用pack函数的参数,然后用嵌套的cons元组连接起来,重新组成一个列表即可。这样就完成了语义上的转换,即为message的每个field调用pack进行编码,然后再组成一个列表,最后调用iolist_bo_binary转换成binary,得到编码。

其它的函数替换也是类似的,要么是替换一个函数clause的参数名字,要么根据message的域定义生成一个cons元组,构建一个新的列表,此处就不再赘述了。

抽象码生成完毕,就可以使用erlang本身的功能,将抽象码转换成源码了。

前面做了如此多的铺垫,直到本章才扯清了protobuffs如何使用erlang的抽象码,读者们也可以发现,使用erlang构建编译器也不是很麻烦,仅仅需要一个leex定义,一个yecc定义,以及语义分析和目标代码生成文件即可,如果目标代码是erlang代码,那么目标代码生成过程可以进一步简化为对一个模板代码进行抽象码替换的过程,是不是很简单呢?

未完待续...

阅读更多
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: