babel实现一个js解析器
v8的流程
-
parser 负责把源码 parse 成 AST。
-
ignation 解释器负责把 AST 转成字节码,然后解释执行
-
turbofan 可以把代码编译成机器码,直接执行
-
gc 负责堆内存的垃圾回收
最早的v8是没有字节码的,parser直接传递给解析器,我们这一届就是要来实现这种js解析器。
思路分析
const a = 1 + 2;
上面代码对应的AST是
AST的json格式,点击查看
{
"type": "File",
"start": 0,
"end": 20,
"loc": {
"start": {
"line": 1,
"column": 0,
"index": 0
},
"end": {
"line": 1,
"column": 20,
"index": 20
}
},
"errors": [],
"program": {
"type": "Program",
"start": 0,
"end": 20,
"loc": {
"start": {
"line": 1,
"column": 0,
"index": 0
},
"end": {
"line": 1,
"column": 20,
"index": 20
}
},
"sourceType": "module",
"interpreter": null,
"body": [
{
"type": "VariableDeclaration",
"start": 0,
"end": 20,
"loc": {
"start": {
"line": 1,
"column": 0,
"index": 0
},
"end": {
"line": 1,
"column": 20,
"index": 20
}
},
"declarations": [
{
"type": "VariableDeclarator",
"start": 6,
"end": 19,
"loc": {
"start": {
"line": 1,
"column": 6,
"index": 6
},
"end": {
"line": 1,
"column": 19,
"index": 19
}
},
"id": {
"type": "Identifier",
"start": 6,
"end": 7,
"loc": {
"start": {
"line": 1,
"column": 6,
"index": 6
},
"end": {
"line": 1,
"column": 7,
"index": 7
},
"identifierName": "a"
},
"name": "a"
},
"init": {
"type": "BinaryExpression",
"start": 10,
"end": 19,
"loc": {
"start": {
"line": 1,
"column": 10,
"index": 10
},
"end": {
"line": 1,
"column": 19,
"index": 19
}
},
"left": {
"type": "BinaryExpression",
"start": 10,
"end": 15,
"loc": {
"start": {
"line": 1,
"column": 10,
"index": 10
},
"end": {
"line": 1,
"column": 15,
"index": 15
}
},
"left": {
"type": "NumericLiteral",
"start": 10,
"end": 11,
"loc": {
"start": {
"line": 1,
"column": 10,
"index": 10
},
"end": {
"line": 1,
"column": 11,
"index": 11
}
},
"extra": {
"rawValue": 1,
"raw": "1"
},
"value": 1
},
"operator": "+",
"right": {
"type": "NumericLiteral",
"start": 14,
"end": 15,
"loc": {
"start": {
"line": 1,
"column": 14,
"index": 14
},
"end": {
"line": 1,
"column": 15,
"index": 15
}
},
"extra": {
"rawValue": 2,
"raw": "2"
},
"value": 2
}
},
"operator": "+",
"right": {
"type": "NumericLiteral",
"start": 18,
"end": 19,
"loc": {
"start": {
"line": 1,
"column": 18,
"index": 18
},
"end": {
"line": 1,
"column": 19,
"index": 19
}
},
"extra": {
"rawValue": 3,
"raw": "3"
},
"value": 3
}
}
}
],
"kind": "const"
}
],
"directives": []
},
"comments": []
}
下面是解析遍历赋值的完整代码,也可以在github上看完整代码
const parser = require('@babel/parser');
const { codeFrameColumns } = require('@babel/code-frame');
const sourceCode = `
const a = 1 + 2;
`;
const ast = parser.parse(sourceCode, {
sourceType: 'unambiguous'
});
const evaluator = (function() {
const astInterpreters = {
Program (node, scope) {
node.body.forEach(item => {
evaluate(item, scope);
})
},
VariableDeclaration(node, scope) {
node.declarations.forEach((item) => {
evaluate(item, scope);
});
},
VariableDeclarator(node, scope) {
const declareName = evaluate(node.id);
if (scope[declareName]) {
throw Error('duplicate declare variable:' + declareName);
} else {
scope[declareName] = evaluate(node.init, scope);
}
},
ExpressionStatement(node, scope) {
return evaluate(node.expression, scope);
},
BinaryExpression(node, scope) {
console.log(node.left.type)
const leftValue = evaluate(node.left, scope);
const rightValue = evaluate(node.right, scope);;
switch(node.operator) {
case '+':
return leftValue + rightValue;
case '-':
return leftValue - rightValue;
case '*':
return leftValue * rightValue;
case '/':
return leftValue / rightValue;
default:
throw Error('upsupported operator:' + node.operator);
}
},
Identifier(node, scope) {
return node.name;
},
NumericLiteral(node, scope) {
return node.value;
}
}
const evaluate = (node, scope) => {
try {
return astInterpreters[node.type](node, scope);
} catch(e) {
if (e && e.message && e.message.indexOf('astInterpreters[node.type] is not a function') != -1) {
console.error('unsupported ast type: ' + node.type);
console.error(codeFrameColumns(sourceCode, node.loc, {
highlightCode: true
}));
} else {
console.error(e.message);
console.error(codeFrameColumns(sourceCode, node.loc, {
highlightCode: true
}));
}
}
}
return {
evaluate
}
})();
const globalScope = {};
evaluator.evaluate(ast.program, globalScope);
console.log(globalScope);
我们改进一下,将函数也解析,下面是解析遍历赋值和函数的完整代码,也可以在github上看完整代码
const parser = require('@babel/parser');
const { codeFrameColumns } = require('@babel/code-frame');
const chalk = require('chalk');
const sourceCode = `
const a = 2;
function add(a, b) {
return a + b;
}
console.log(add(1, 2));
`;
const ast = parser.parse(sourceCode, {
sourceType: 'unambiguous'
});
class Scope {
constructor(parentScope) {
this.parent = parentScope;
this.declarations = [];
}
set(name, value) {
this.declarations[name] = value;
}
getLocal(name) {
return this.declarations[name];
}
get(name) {
let res = this.getLocal(name);
if (res === undefined && this.parent) {
res = this.parent.get(name);
}
return res;
}
has(name) {
return !!this.getLocal(name);
}
}
function getIdentifierValue(node, scope) {
if (node.type === 'Identifier') {
return scope.get(node.name);
} else {
return evaluate(node, scope);
}
}
const evaluator = (function() {
const astInterpreters = {
Program (node, scope) {
node.body.forEach(item => {
evaluate(item, scope);
})
},
VariableDeclaration(node, scope) {
node.declarations.forEach((item) => {
evaluate(item, scope);
});
},
VariableDeclarator(node, scope) {
const declareName = evaluate(node.id);
if (scope.get(declareName)) {
throw Error('duplicate declare variable:' + declareName);
} else {
scope.set(declareName, evaluate(node.init, scope));
}
},
ExpressionStatement(node, scope) {
return evaluate(node.expression, scope);
},
MemberExpression(node, scope) {
const obj = scope.get(evaluate(node.object));
return obj[evaluate(node.property)]
},
FunctionDeclaration(node, scope) {
const declareName = evaluate(node.id);
if (scope.get(declareName)) {
throw Error('duplicate declare variable:' + declareName);
} else {
scope.set(declareName, function(...args) {
const funcScope = new Scope();
funcScope.parent = scope;
node.params.forEach((item, index) => {
funcScope.set(item.name, args[index]);
});
funcScope.set('this', this);
return evaluate(node.body, funcScope);
});
}
},
ReturnStatement(node, scope) {
return evaluate(node.argument, scope);
},
BlockStatement(node, scope) {
for (let i = 0; i< node.body.length; i++) {
if (node.body[i].type === 'ReturnStatement') {
return evaluate(node.body[i], scope);
}
evaluate(node.body[i], scope);
}
},
CallExpression(node, scope) {
const args = node.arguments.map(item => {
if (item.type === 'Identifier') {
return scope.get(item.name);
}
return evaluate(item, scope);
});
if(node.callee.type === 'MemberExpression') {
const fn = evaluate(node.callee, scope);
const obj = evaluate(node.callee.object, scope);
return fn.apply(obj, args);
} else {
const fn = scope.get(evaluate(node.callee, scope));
return fn.apply(null, args);
}
},
BinaryExpression(node, scope) {
const leftValue = getIdentifierValue(node.left, scope);
const rightValue = getIdentifierValue(node.right, scope);;
switch(node.operator) {
case '+':
return leftValue + rightValue;
case '-':
return leftValue - rightValue;
case '*':
return leftValue * rightValue;
case '/':
return leftValue / rightValue;
default:
throw Error('upsupported operator:' + node.operator);
}
},
Identifier(node, scope) {
return node.name;
},
NumericLiteral(node, scope) {
return node.value;
}
}
const evaluate = (node, scope) => {
try {
return astInterpreters[node.type](node, scope);
} catch(e) {
if (e && e.message && e.message.indexOf('astInterpreters[node.type] is not a function') != -1) {
console.error('unsupported ast type: ' + node.type);
console.error(codeFrameColumns(sourceCode, node.loc, {
highlightCode: true
}));
} else {
console.error(node.type + ':',e.message);
console.error(codeFrameColumns(sourceCode, node.loc, {
highlightCode: true
}));
}
}
}
return {
evaluate
}
})();
const globalScope = new Scope();
globalScope.set('console', {
log: function (...args) {
console.log(chalk.green(...args));
},
error: function (...args) {
console.log(chalk.red(...args));
},
error: function (...args) {
console.log(chalk.orange(...args));
},
});
evaluator.evaluate(ast.program, globalScope);