连载《Chrome V8 原理讲解》第九篇 Builtin源码分析

VSole2021-10-12 16:39:27

1 摘要

上一篇文章中,Builtin作为先导知识,我们做了宏观概括和介绍。Builtin(Built-in function)是编译好的内置代码块(chunk),存储在snapshot_blob.bin文件中,V8启动时以反序列化方式加载,运行时可以直接调用。Builtins功能共计600多个,细分为多个子类型,涵盖了解释器、字节码、执行单元等多个V8核心功能,本文从微观角度剖析Builtins功能的源码,在不使用snapshot_blob.bin文件的情况下,详细说明Builtin创建和运行过程。

本文内容组织结构:Bultin初始化过程(章节2),Builtin子类型讲解(章节3)。

2 Builtin初始化

下面是code类,它负责管理所有Builtin功能,是builtin table的数据类型。

1.  
  class 
  Code : 
  public HeapObject {
  
2.   
  public:
  
3.    NEVER_READ_ONLY_SPACE
  
4.    
  // Opaque data type for encapsulating code flags like kind, inline
5.    
  // cache state, and arguments count.
6.    
  using Flags = 
  uint32_t;
  
7.  
  #define CODE_KIND_LIST(V)   \
8.    V(OPTIMIZED_FUNCTION)     \
9.    V(BYTECODE_HANDLER)       \
10.   V(STUB)                   \
11.    V(BUILTIN)                \
12.    V(REGEXP)                 \
13.    V(WASM_FUNCTION)          \
14.    V(WASM_TO_CAPI_FUNCTION)  \
15.    V(WASM_TO_JS_FUNCTION)    \
16.    V(JS_TO_WASM_FUNCTION)    \
17.    V(JS_TO_JS_FUNCTION)      \
18.    V(WASM_INTERPRETER_ENTRY) \
19.    V(C_WASM_ENTRY)
20.    
  enum Kind {
  
21.  
  #define DEFINE_CODE_KIND_ENUM(name) name,
22.      CODE_KIND_LIST(DEFINE_CODE_KIND_ENUM)
  
23.  
  #undef DEFINE_CODE_KIND_ENUM
24.          NUMBER_OF_KINDS
  
25.    };
  
26.    
  static 
  const 
  char* 
  Kind2String(Kind kind);
  
27.    
  // Layout description.
28.  
  #define CODE_FIELDS(V)                                                    \
29.    V(kRelocationInfoOffset, kTaggedSize)                                   \
30.    V(kDeoptimizationDataOffset, kTaggedSize)                               \
31.    V(kSourcePositionTableOffset, kTaggedSize)                              \
32.    V(kCodeDataContainerOffset, kTaggedSize)                                \
33.    /* Data or code not directly visited by GC directly starts here. */     \
34.    /* The serializer needs to copy bytes starting from here verbatim. */   \
35.    /* Objects embedded into code is visited via reloc info. */             \
36.    V(kDataStart, 0)                                                        \
37.    V(kInstructionSizeOffset, kIntSize)                                     \
38.    V(kFlagsOffset, kIntSize)                                               \
39.    V(kSafepointTableOffsetOffset, kIntSize)                                \
40.    V(kHandlerTableOffsetOffset, kIntSize)                                  \
41.    V(kConstantPoolOffsetOffset,                                            \
42.      FLAG_enable_embedded_constant_pool ? kIntSize : 0)                    \
43.    V(kCodeCommentsOffsetOffset, kIntSize)                                  \
44.    V(kBuiltinIndexOffset, kIntSize)                                        \
45.    V(kUnalignedHeaderSize, 0)                                              \
46.    /* Add padding to align the instruction start following right after */  \
47.    /* the Code object header. */                                           \
48.    V(kOptionalPaddingOffset, CODE_POINTER_PADDING(kOptionalPaddingOffset)) \
49.    V(kHeaderSize, 0)
50.    DEFINE_FIELD_OFFSET_CONSTANTS(HeapObject::kHeaderSize, CODE_FIELDS)
  
51.  
  #undef CODE_FIELDS
52.    STATIC_ASSERT(FIELD_SIZE(kOptionalPaddingOffset) == kHeaderPaddingSize);
  
53.    
  inline 
  int 
  GetUnwindingInfoSizeOffset() 
  const;
  
54.    
  class 
  BodyDescriptor;
  
55.    
  // Flags layout.  BitField<type, shift, size>.
56.  
  #define CODE_FLAGS_BIT_FIELDS(V, _)    \
57.    V(HasUnwindingInfoField, bool, 1, _) \
58.    V(KindField, Kind, 5, _)             \
59.    V(IsTurbofannedField, bool, 1, _)    \
60.    V(StackSlotsField, int, 24, _)       \
61.    V(IsOffHeapTrampoline, bool, 1, _)
62.    DEFINE_BIT_FIELDS(CODE_FLAGS_BIT_FIELDS)
  
63.  
  #undef CODE_FLAGS_BIT_FIELDS
64.    
  static_assert(NUMBER_OF_KINDS <= KindField::kMax, 
  "Code::KindField size");
  
65.    
  static_assert(IsOffHeapTrampoline::kLastUsedBit < 
  32,
  
66.                  
  "Code::flags field exhausted");
  
67.    
  // KindSpecificFlags layout (STUB, BUILTIN and OPTIMIZED_FUNCTION)
68.  
  #define CODE_KIND_SPECIFIC_FLAGS_BIT_FIELDS(V, _) \
69.    V(MarkedForDeoptimizationField, bool, 1, _)     \
70.    V(EmbeddedObjectsClearedField, bool, 1, _)      \
71.    V(DeoptAlreadyCountedField, bool, 1, _)         \
72.    V(CanHaveWeakObjectsField, bool, 1, _)          \
73.    V(IsPromiseRejectionField, bool, 1, _)          \
74.    V(IsExceptionCaughtField, bool, 1, _)
75.    DEFINE_BIT_FIELDS(CODE_KIND_SPECIFIC_FLAGS_BIT_FIELDS)
  
76.  
  #undef CODE_KIND_SPECIFIC_FLAGS_BIT_FIELDS
77.   
  private:
  
78.    
  friend 
  class 
  RelocIterator;
  
79.    
  bool 
  is_promise_rejection() 
  const;
  
80.    
  bool 
  is_exception_caught() 
  const;
  
81.    OBJECT_CONSTRUCTORS(Code, HeapObject);  
  
82.  };
  
//........................代码太长,省略很多.....................
//.............................................................

上述代码中,CODE_KIND_LIST从code角度定义了类型,在Builtin类中也定义了Builtin一共有七种子类型,这是两种不同的定义方式,但说的都是Builtin。Builtin的初始化工作由方法void Isolate::Initialize(Isolate* isolate,const v8::Isolate::CreateParams& params)统一完成,下面给出这个方法的部分代码。

0.  
  void Isolate::Initialize(Isolate* isolate,
  
1.                           
  const v8::Isolate::CreateParams& params) {
  
2.    i::Isolate* i_isolate = 
  reinterpret_cast<i::Isolate*>(isolate);
  
3.    CHECK_NOT_NULL(params.array_buffer_allocator);
  
4.    i_isolate->set_array_buffer_allocator(params.array_buffer_allocator);
  
5.    
  if (params.snapshot_blob != 
  nullptr) {
  
6.      i_isolate->set_snapshot_blob(params.snapshot_blob);
  
7.    } 
  else {
  
8.      i_isolate->set_snapshot_blob(i::Snapshot::DefaultSnapshotBlob());
  
9.    }
  
10.    
  auto code_event_handler = params.code_event_handler;
  
11.  
  //........................代码太长,省略很多.....................
12.    
  if (!i::Snapshot::Initialize(i_isolate)) {
  
13.      
  // If snapshot data was provided and we failed to deserialize it must
14.      
  // have been corrupted.
15.      
  if (i_isolate->snapshot_blob() != 
  nullptr) {
  
16.        FATAL(
  
17.            
  "Failed to deserialize the V8 snapshot blob. This can mean that the "
18.            
  "snapshot blob file is corrupted or missing.");
  
19.      }
  
20.      base::ElapsedTimer timer;
  
21.      
  if (i::FLAG_profile_deserialization) timer.Start();
  
22.      i_isolate->InitWithoutSnapshot();
  
23.      
  if (i::FLAG_profile_deserialization) {
  
24.        
  double ms = timer.Elapsed().InMillisecondsF();
  
25.        i::PrintF(
  "[Initializing isolate from scratch took %0.3f ms]\n", ms);
  
26.      }
  
27.    }
  
28.    i_isolate->set_only_terminate_in_safe_scope(
  
29.        params.only_terminate_in_safe_scope);
  
30.  }
  

上述方面中进入第22行,最终进入下面的Builtin初始化方法。

1.  
  void SetupIsolateDelegate::SetupBuiltinsInternal(Isolate* isolate) {
  
2.  
  //...................删除部分代码,留下最核心功能
3.  
  //...................删除部分代码,留下最核心功能
4.    
  int index = 
  0;
  
5.    Code code;
  
6.  
  #define BUILD_CPP(Name)                                                      \
7.    code = BuildAdaptor(isolate, index, FUNCTION_ADDR(Builtin_##Name), #Name); \
8.    AddBuiltin(builtins, index++, code);
9.  
  #define BUILD_TFJ(Name, Argc, ...)                              \
10.   code = BuildWithCodeStubAssemblerJS(                          \
11.        isolate, index, &Builtins::Generate_##Name, Argc, #Name); \
12.    AddBuiltin(builtins, index++, code);
13.  
  #define BUILD_TFC(Name, InterfaceDescriptor)                      \
14.    /* Return size is from the provided CallInterfaceDescriptor. */ \
15.    code = BuildWithCodeStubAssemblerCS(                            \
16.        isolate, index, &Builtins::Generate_##Name,                 \
17.        CallDescriptors::InterfaceDescriptor, #Name);               \
18.    AddBuiltin(builtins, index++, code);
19.  
  #define BUILD_TFS(Name, ...)                                                   \
20.    /* Return size for generic TF builtins (stub linkage) is always 1. */        \
21.    code =                                                                       \
22.        BuildWithCodeStubAssemblerCS(isolate, index, &Builtins::Generate_##Name, \
23.                                     CallDescriptors::Name, #Name);              \
24.    AddBuiltin(builtins, index++, code);
25.  
  #define BUILD_TFH(Name, InterfaceDescriptor)              \
26.    /* Return size for IC builtins/handlers is always 1. */ \
27.    code = BuildWithCodeStubAssemblerCS(                    \
28.        isolate, index, &Builtins::Generate_##Name,         \
29.        CallDescriptors::InterfaceDescriptor, #Name);       \
30.    AddBuiltin(builtins, index++, code);
31.  
  #define BUILD_BCH(Name, OperandScale, Bytecode)                           \
32.    code = GenerateBytecodeHandler(isolate, index, OperandScale, Bytecode); \
33.    AddBuiltin(builtins, index++, code);
34.  
  #define BUILD_ASM(Name, InterfaceDescriptor)                                \
35.    code = BuildWithMacroAssembler(isolate, index, Builtins::Generate_##Name, \
36.                                   #Name);                                    \
37.    AddBuiltin(builtins, index++, code);
38.    BUILTIN_LIST(BUILD_CPP, BUILD_TFJ, BUILD_TFC, BUILD_TFS, BUILD_TFH, BUILD_BCH,
  
39.                 BUILD_ASM);
  
40.  
  //...................删除部分代码,留下最核心功能
41.  
  //...................删除部分代码,留下最核心功能
42.  }
  

上述代码只保留了最核心的Builtin初始化功能,初始化工作主要是生成并编译Builtin代码,并以独立功能的形式挂载到isolate上,以BuildWithCodeStubAssemblerCS()详细描述该过程。

见下面代码,第一个参数是isolate,用于保存初化完成的Builtin;第二个参数全局变量index,Builtin存储在isolate的数组成员中,index是数组下标;第三个参数generator是函数指针,该函数用于生成Builtin;第四个参数是call描述符;最后一个是函数名字。

1.  
  // Builder for builtins implemented in TurboFan with CallStub linkage.
2.  Code 
  BuildWithCodeStubAssemblerCS(Isolate* isolate, 
  int32_t builtin_index,
  
3.                                    CodeAssemblerGenerator generator,
  
4.                                    CallDescriptors::Key interface_descriptor,
  
5.                                    
  const 
  char* name) {
  
6.    HandleScope 
  scope(isolate);
  
7.    
  // Canonicalize handles, so that we can share constant pool entries pointing
8.    
  // to code targets without dereferencing their handles.
9.    CanonicalHandleScope 
  canonical(isolate);
  
10.   Zone 
  zone(isolate->allocator(), ZONE_NAME);
  
11.    
  // The interface descriptor with given key must be initialized at this point
12.    
  // and this construction just queries the details from the descriptors table.
13.    CallInterfaceDescriptor 
  descriptor(interface_descriptor);
  
14.    
  // Ensure descriptor is already initialized.
15.    DCHECK_LE(
  0, descriptor.GetRegisterParameterCount());
  
16.    compiler::CodeAssemblerState 
  state(
  
17.        isolate, &zone, descriptor, Code::BUILTIN, name,
  
18.        PoisoningMitigationLevel::kDontPoison, builtin_index);
  
19.    generator(&state);
  
20.    Handle<Code> code = compiler::CodeAssembler::GenerateCode(
  
21.        &state, BuiltinAssemblerOptions(isolate, builtin_index));
  
22.    
  return *code;
  
23.  }
  

在代码中,第19行代码生成Builtin源码,以第一个Builtin为例说明generator(&state)的功能,此时generator指针代表的函数是TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler),下面是代码:

1.  TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler) {
  
2.    Label 
  generational_wb(
  this);
  
3.    Label 
  incremental_wb(
  this);
  
4.    Label 
  exit(
  this);
  
5.    Node* remembered_set = Parameter(Descriptor::kRememberedSet);
  
6.    Branch(ShouldEmitRememberSet(remembered_set), &generational_wb,
  
7.           &incremental_wb);
  
8.    BIND(&generational_wb);
  
9.    {
  
10.     Label 
  test_old_to_young_flags(
  this);
  
11.      Label 
  store_buffer_exit(
  this), 
  store_buffer_incremental_wb(
  this);
  
12.      TNode<IntPtrT> slot = UncheckedCast<IntPtrT>(Parameter(Descriptor::kSlot));
  
13.      Branch(IsMarking(), &test_old_to_young_flags, &store_buffer_exit);
  
14.      BIND(&test_old_to_young_flags);
  
15.      {
  
16.        TNode<IntPtrT> value =
  
17.            BitcastTaggedToWord(Load(MachineType::TaggedPointer(), slot));
  
18.        TNode<BoolT> value_is_young =
  
19.            IsPageFlagSet(value, MemoryChunk::kIsInYoungGenerationMask);
  
20.        GotoIfNot(value_is_young, &incremental_wb);
  
21.        TNode<IntPtrT> object =
  
22.            BitcastTaggedToWord(Parameter(Descriptor::kObject));
  
23.        TNode<BoolT> object_is_young =
  
24.            IsPageFlagSet(object, MemoryChunk::kIsInYoungGenerationMask);
  
25.        Branch(object_is_young, &incremental_wb, &store_buffer_incremental_wb);
  
26.      }
  
27.      BIND(&store_buffer_exit);
  
28.      {
  
29.        TNode<ExternalReference> isolate_constant =
  
30.            ExternalConstant(ExternalReference::isolate_address(isolate()));
  
31.        Node* fp_mode = Parameter(Descriptor::kFPMode);
  
32.        InsertToStoreBufferAndGoto(isolate_constant, slot, fp_mode, &
  exit);
  
33.      }
  
34.      BIND(&store_buffer_incremental_wb);
  
35.      {
  
36.        TNode<ExternalReference> isolate_constant =
  
37.            ExternalConstant(ExternalReference::isolate_address(isolate()));
  
38.        Node* fp_mode = Parameter(Descriptor::kFPMode);
  
39.        InsertToStoreBufferAndGoto(isolate_constant, slot, fp_mode,
  
40.                                   &incremental_wb);
  
41.      }
  
42.    } 
  //........................省略代码......................................
43.    BIND(&
  exit);
  
44.    IncrementCounter(isolate()->counters()->write_barriers(), 
  1);
  
45.    Return(TrueConstant());
  
46.  }
  

这个函数TF_BUILTIN(RecordWrite, RecordWriteCodeStubAssembler)是生成器,它的作用是生成写记录功能的源代码,TF_BUILTIN是宏模板,展开后可以看到它的类成员CodeAssemblerState* state保存了生成之后的源码。“用平台无关的生成器为特定平台生成源代码”是Builtin的常用做法,这样减少了工作量。函数执行完成后返回到BuildWithCodeStubAssemblerCS,生成的源代码经过处理后,最终由code表示,下面是code的数据类型。

class 
  Code : 
  public HeapObject {
  
 
  public:
  
  NEVER_READ_ONLY_SPACE
  
  
  // Opaque data type for encapsulating code flags like kind, inline
  
  // cache state, and arguments count.
  
  using Flags = 
  uint32_t;
  
#define CODE_KIND_LIST(V)   \
  V(OPTIMIZED_FUNCTION)     \
  V(BYTECODE_HANDLER)       \
  V(STUB)                   \
  V(BUILTIN)                \
  V(REGEXP)                 \
  V(WASM_FUNCTION)          \
  V(WASM_TO_CAPI_FUNCTION)  \
  V(WASM_TO_JS_FUNCTION)    \
  V(JS_TO_WASM_FUNCTION)    \
  V(JS_TO_JS_FUNCTION)      \
  V(WASM_INTERPRETER_ENTRY) \
  V(C_WASM_ENTRY)
  
  enum Kind {
  
#define DEFINE_CODE_KIND_ENUM(name) name,
    CODE_KIND_LIST(DEFINE_CODE_KIND_ENUM)
  
#undef DEFINE_CODE_KIND_ENUM
        NUMBER_OF_KINDS
  
  };
  
//..................省略........................
//.............................................

上述代码中,可以看到从code的角度对Builtin进行了更详细的分类。另外code是堆对象,也就是说Builtin是由V8的堆栈进行管理,后续讲到堆栈时再详细说明这部分知识。图2给出函数调用堆栈,供读者自行复现。

在SetupBuiltinsInternal()中可以看到AddBuiltin()将生成的code代码添加到isolate中,代码如下。

void SetupIsolateDelegate::AddBuiltin(Builtins* builtins, 
  int index,
  
                                      Code code) {
  
  DCHECK_EQ(index, code.builtin_index());
  
  builtins->set_builtin(index, code);
  
}
  
//..............分隔线......................
void Builtins::set_builtin(
  int index, Code builtin) {
  
  isolate_->heap()->set_builtin(index, builtin);
  
}
  

所有Builtin功能生成后保存在Address builtins_[Builtins::builtin_count]中,初始化方法SetupBuiltinsInternal按照BUILTIN_LIST的定义顺序依次完成所有Builtin的源码生成、编译和挂载到isolate 

2 Builtin子类型

从Builtins的功能看,它包括了:Ignition实现、字节码实现、以及ECMA规范实现等众多V8的核心功能,在BUILTIN_LIST定义中有详细注释,请读者自行查阅。前面讲过,从BUILTIN的实现角度分为七种类型,见下面代码:

#define BUILD_CPP(Name)
#define BUILD_TFJ(Name, Argc, ...) 
#define BUILD_TFC(Name, InterfaceDescriptor)  
#define BUILD_TFS(Name, ...) 
#define BUILD_TFH(Name, InterfaceDescriptor) 
#define BUILD_BCH(Name, OperandScale, Bytecode)  
#define BUILD_ASM(Name, InterfaceDescriptor)

以子类型BUILD_CPP举例说明,下面是完整源代码。

1.  Code 
  BuildAdaptor(Isolate* isolate, 
  int32_t builtin_index,
  
2.                    Address builtin_address, 
  const 
  char* name) {
  
3.    HandleScope 
  scope(isolate);
  
4.    
  // Canonicalize handles, so that we can share constant pool entries pointing
5.    
  // to code targets without dereferencing their handles.
6.    CanonicalHandleScope 
  canonical(isolate);
  
7.    
  constexpr 
  int kBufferSize = 
  32 * KB;
  
8.    byte buffer[kBufferSize];
  
9.    MacroAssembler 
  masm(isolate, BuiltinAssemblerOptions(isolate, builtin_index),
  
10.                       CodeObjectRequired::kYes,
  
11.                        
  ExternalAssemblerBuffer(buffer, kBufferSize));
  
12.    masm.set_builtin_index(builtin_index);
  
13.    DCHECK(!masm.has_frame());
  
14.    Builtins::Generate_Adaptor(&masm, builtin_address);
  
15.    CodeDesc desc;
  
16.    masm.GetCode(isolate, &desc);
  
17.    Handle<Code> code = Factory::CodeBuilder(isolate, desc, Code::BUILTIN)
  
18.                            .set_self_reference(masm.CodeObject())
  
19.                            .set_builtin_index(builtin_index)
  
20.                            .Build();
  
21.    
  return *code;
  
22.  }
  

BuildAdaptor的生成功能由第13行代码实现,最终该代码的实现如下:

void Builtins::Generate_Adaptor(MacroAssembler* masm, Address address) {
  
  __ LoadAddress(kJavaScriptCallExtraArg1Register,
  
                 ExternalReference::Create(address));
  
  __ Jump(BUILTIN_CODE(masm->isolate(), AdaptorWithBuiltinExitFrame),
  
          RelocInfo::CODE_TARGET);
  
}
  
}
  

上面两部分代码实现了第77号Builtin功能,名字是HandleApiCall,图2以char类型展示了生成的源代码。

总结:学习Builtin时,涉及很多系统结构相关的知识,本文讲解采用的是x64架构。每种Builtin的生成方式虽不相同,但分析源码的思路相同,有问题可以联系我。

好了,今天到这里,下次见。

恳请读者批评指正、提出宝贵意见

微信:qq9123013 备注:v8交流 邮箱:v8blink@outlook.com

这个过程中借鉴了很多师傅的经验,受益匪浅,也少走了很多弯路。


功能分析源码
本作品采用《CC 协议》,转载必须注明作者和本文链接
因是内部系统,所以打码稍微严重些。我看了登录框,没验证码,首先想到的肯定是对admin账号先爆破一波。.那接下来就一切顺利了,修改了admin密码,并成功登陆,看到了所有分公司的数据。。。。。。。设计者这就应该挨板子了,再然后才有越权访问admin信息,再有原密码的绕过,这一系列小漏洞单独来说不严重,但一旦结合起来,就出现了很严重的安全事件,在进行安全防护工作时,应尽量考虑周全,任何一个弱点都可能成为黑客的突破口。
2021安洵杯PWN WP详解
2021-12-29 16:41:08
做了2021安洵杯线上赛题目,总体来说题目有简单有难的,难易程度合适,这次就做了pwn,把四道pwn题思路总结一下,重点是没几个人做出来的最后一道pwnsky,赛后做了复现。
那么tblive就会组装消息通过子父进程通讯给钉钉主进程, 由主进程和服务器进行通讯完成该功能。后续通过折半查找法,定位到了钉钉的消息队列处理线程。所以这个时候取消malloc的断点开始向上回溯,停到疑似创建成员指针的位置后下断重新变动台下成员,经过验证这个call则为创建成员指针。注意劫持的dll是air2.dll,将原dll名称改为air2Org.dll即可。
勒索病毒的深度分析
吃鸡辅助外挂的逆向实践
背景在某社群中看到有用户网社群共享盘中上传一个名称为协议微信加好友的应用软件,并且在社群中宣称可以无限加好
未知攻,焉知防?随着基于威胁情报的安全产品/服务取得长足的进步和巨大的成功,威胁情报相关的应用价值效果显著,导致了攻击者的相关C2等很容易迅速遭到封杀,攻击者不断的谋求新出路,使用域前置、域隐藏、域借用等隐藏真实C2的攻击手法也越来越多。
随着互联网技术的迅速发展,网络已成为人们生活、娱乐和工作不可或缺的一部分。随之而来的安全问题,使得网络安全成为各国信息安全领域研究的热点方向之一。因此,提出一种动态安全防护框架,采用动态防护模型、安全防护功能动态重组、安全防护服务集成和安全防护载荷的架构模式,通过对终端安全防护功能、网络安全防护功能、云安全防护功能的载荷化,面对网络攻击“以变制变”,构筑强对抗环境下的有效安全防护能力。
VSole
网络安全专家