python3词法分析(一)词法单元
一、词法单元
词法单元
:词法单元名 + 可选属性值
所有的token都在Grammar/Tokens
ENDMARKER
NAME
NUMBER
STRING
NEWLINE
INDENT
DEDENTLPAR '('
RPAR ')'
LSQB '['
RSQB ']'
COLON ':'
COMMA ','
SEMI ';'
PLUS '+'
MINUS '-'
STAR '*'
SLASH '/'
VBAR '|'
AMPER '&'
LESS '<'
GREATER '>'
EQUAL '='
DOT '.'
PERCENT '%'
LBRACE '{'
RBRACE '}'
EQEQUAL '=='
NOTEQUAL '!='
LESSEQUAL '<='
GREATEREQUAL '>='
TILDE '~'
CIRCUMFLEX '^'
LEFTSHIFT '<<'
RIGHTSHIFT '>>'
DOUBLESTAR '**'
PLUSEQUAL '+='
MINEQUAL '-='
STAREQUAL '*='
SLASHEQUAL '/='
PERCENTEQUAL '%='
AMPEREQUAL '&='
VBAREQUAL '|='
CIRCUMFLEXEQUAL '^='
LEFTSHIFTEQUAL '<<='
RIGHTSHIFTEQUAL '>>='
DOUBLESTAREQUAL '**='
DOUBLESLASH '//'
DOUBLESLASHEQUAL '//='
AT '@'
ATEQUAL '@='
RARROW '->'
ELLIPSIS '...'
COLONEQUAL ':='OP
AWAIT
ASYNC
TYPE_IGNORE
TYPE_COMMENT
SOFT_KEYWORD
ERRORTOKEN# These aren't used by the C tokenizer but are needed for tokenize.py
COMMENT
NL
ENCODING
二、自动生成代码
当执行./configure时将Makefile.pre.in写入到Makefile中。
Makefile.pre.in中包含如下规则
.PHONY: regen-token
regen-token:# Regenerate Doc/library/token-list.inc from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py rst \$(srcdir)/Grammar/Tokens \$(srcdir)/Doc/library/token-list.inc# Regenerate Include/token.h from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py h \$(srcdir)/Grammar/Tokens \$(srcdir)/Include/token.h# Regenerate Parser/token.c from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py c \$(srcdir)/Grammar/Tokens \$(srcdir)/Parser/token.c# Regenerate Lib/token.py from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py py \$(srcdir)/Grammar/Tokens \$(srcdir)/Lib/token.py
将调用Tools/scripts/generate_token.py生成token相关代码。
2.1 Include/token.h
/* Auto-generated by Tools/scripts/generate_token.py *//* Token types */
#ifndef Py_LIMITED_API
#ifndef Py_TOKEN_H
#define Py_TOKEN_H
#ifdef __cplusplus
extern "C" {#endif#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */#define ENDMARKER 0
#define NAME 1
#define NUMBER 2
#define STRING 3
#define NEWLINE 4
#define INDENT 5
#define DEDENT 6
#define LPAR 7
#define RPAR 8
#define LSQB 9
#define RSQB 10
#define COLON 11
#define COMMA 12
#define SEMI 13
#define PLUS 14
#define MINUS 15
#define STAR 16
#define SLASH 17
#define VBAR 18
#define AMPER 19
#define LESS 20
#define GREATER 21
#define EQUAL 22
#define DOT 23
#define PERCENT 24
#define LBRACE 25
#define RBRACE 26
#define EQEQUAL 27
#define NOTEQUAL 28
#define LESSEQUAL 29
#define GREATEREQUAL 30
#define TILDE 31
#define CIRCUMFLEX 32
#define LEFTSHIFT 33
#define RIGHTSHIFT 34
#define DOUBLESTAR 35
#define PLUSEQUAL 36
#define MINEQUAL 37
#define STAREQUAL 38
#define SLASHEQUAL 39
#define PERCENTEQUAL 40
#define AMPEREQUAL 41
#define VBAREQUAL 42
#define CIRCUMFLEXEQUAL 43
#define LEFTSHIFTEQUAL 44
#define RIGHTSHIFTEQUAL 45
#define DOUBLESTAREQUAL 46
#define DOUBLESLASH 47
#define DOUBLESLASHEQUAL 48
#define AT 49
#define ATEQUAL 50
#define RARROW 51
#define ELLIPSIS 52
#define COLONEQUAL 53
#define OP 54
#define AWAIT 55
#define ASYNC 56
#define TYPE_IGNORE 57
#define TYPE_COMMENT 58
#define SOFT_KEYWORD 59
#define ERRORTOKEN 60
#define N_TOKENS 64
#define NT_OFFSET 256/* Special definitions for cooperation with parser */#define ISTERMINAL(x) ((x) < NT_OFFSET)
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
#define ISEOF(x) ((x) == ENDMARKER)
#define ISWHITESPACE(x) ((x) == ENDMARKER || \(x) == NEWLINE || \(x) == INDENT || \(x) == DEDENT)PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
PyAPI_FUNC(int) PyToken_OneChar(int);
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKEN_H */
#endif /* Py_LIMITED_API */
2.2 Parser/token.c
/* Auto-generated by Tools/scripts/generate_token.py */#include "Python.h"
#include "token.h"/* Token names */const char * const _PyParser_TokenNames[] = {"ENDMARKER","NAME","NUMBER","STRING","NEWLINE","INDENT","DEDENT","LPAR","RPAR","LSQB","RSQB","COLON","COMMA","SEMI","PLUS","MINUS","STAR","SLASH","VBAR","AMPER","LESS","GREATER","EQUAL","DOT","PERCENT","LBRACE","RBRACE","EQEQUAL","NOTEQUAL","LESSEQUAL","GREATEREQUAL","TILDE","CIRCUMFLEX","LEFTSHIFT","RIGHTSHIFT","DOUBLESTAR","PLUSEQUAL","MINEQUAL","STAREQUAL","SLASHEQUAL","PERCENTEQUAL","AMPEREQUAL","VBAREQUAL","CIRCUMFLEXEQUAL","LEFTSHIFTEQUAL","RIGHTSHIFTEQUAL","DOUBLESTAREQUAL","DOUBLESLASH","DOUBLESLASHEQUAL","AT","ATEQUAL","RARROW","ELLIPSIS","COLONEQUAL","OP","AWAIT","ASYNC","TYPE_IGNORE","TYPE_COMMENT","SOFT_KEYWORD","<ERRORTOKEN>","<COMMENT>","<NL>","<ENCODING>","<N_TOKENS>",
};/* Return the token corresponding to a single character */int
PyToken_OneChar(int c1)
{switch (c1) {case '%': return PERCENT;case '&': return AMPER;case '(': return LPAR;case ')': return RPAR;case '*': return STAR;case '+': return PLUS;case ',': return COMMA;case '-': return MINUS;case '.': return DOT;case '/': return SLASH;case ':': return COLON;case ';': return SEMI;case '<': return LESS;case '=': return EQUAL;case '>': return GREATER;case '@': return AT;case '[': return LSQB;case ']': return RSQB;case '^': return CIRCUMFLEX;case '{': return LBRACE;case '|': return VBAR;case '}': return RBRACE;case '~': return TILDE;}return OP;
}int
PyToken_TwoChars(int c1, int c2)
{switch (c1) {case '!':switch (c2) {case '=': return NOTEQUAL;}break;case '%':switch (c2) {case '=': return PERCENTEQUAL;}break;case '&':switch (c2) {case '=': return AMPEREQUAL;}break;case '*':switch (c2) {case '*': return DOUBLESTAR;case '=': return STAREQUAL;}break;case '+':switch (c2) {case '=': return PLUSEQUAL;}break;case '-':switch (c2) {case '=': return MINEQUAL;case '>': return RARROW;}break;case '/':switch (c2) {case '/': return DOUBLESLASH;case '=': return SLASHEQUAL;}break;case ':':switch (c2) {case '=': return COLONEQUAL;}break;case '<':switch (c2) {case '<': return LEFTSHIFT;case '=': return LESSEQUAL;case '>': return NOTEQUAL;}break;case '=':switch (c2) {case '=': return EQEQUAL;}break;case '>':switch (c2) {case '=': return GREATEREQUAL;case '>': return RIGHTSHIFT;}break;case '@':switch (c2) {case '=': return ATEQUAL;}break;case '^':switch (c2) {case '=': return CIRCUMFLEXEQUAL;}break;case '|':switch (c2) {case '=': return VBAREQUAL;}break;}return OP;
}int
PyToken_ThreeChars(int c1, int c2, int c3)
{switch (c1) {case '*':switch (c2) {case '*':switch (c3) {case '=': return DOUBLESTAREQUAL;}break;}break;case '.':switch (c2) {case '.':switch (c3) {case '.': return ELLIPSIS;}break;}break;case '/':switch (c2) {case '/':switch (c3) {case '=': return DOUBLESLASHEQUAL;}break;}break;case '<':switch (c2) {case '<':switch (c3) {case '=': return LEFTSHIFTEQUAL;}break;}break;case '>':switch (c2) {case '>':switch (c3) {case '=': return RIGHTSHIFTEQUAL;}break;}break;}return OP;
}
python3词法分析(一)词法单元相关推荐
- python3 %%time 表示执行单元格时间 时间指的是CPU时间
python3 %%time 表示执行单元格时间 时间指的是CPU时间 举例 %%time from sagemaker.pytorch import PyTorch from sagemaker.p ...
- ebnf范式_使用Scala基于词法单元的解析器定制EBNF范式文法解析
前言 近期在做Oracle迁移到Spark平台的项目上遇到了一些平台公式翻译为SparkSQL(on Hive)的需求,而Spark采用亲妈语言Scala进行开发.下面是个意外,被论文查重了,移步至我 ...
- 让解析器可以快速处理词法单元之间的空格
2019独角兽企业重金招聘Python工程师标准>>> 空格在字符串中时必要的字符,如果在字符串解析中,空格时必须要处理的. rules / tokens 分别定义区分空格的 rul ...
- python翻译matlab,如何在python3中翻译MATLAB单元?
使用我在链接文章中演示的Octave/scipy save/loadmat: 在八度音阶中>> num_nodes=3 num_nodes = 3 >> num_nodes=3 ...
- python3词法分析(三)识别token
python3.10.2 主要分析Parser/tokenizer.c的tok_get函数 一.预处理行首 1.跳过空白字符 1.1.1 空格.Tab.\014 static int tok_get( ...
- 编译原理 - 词法分析
词法分析 词法分析器 作用 编译过程划分为词法分析和语法分析两个阶段的原因 语法分析中的三个概念 词法分析的实现 如何区分兼容性的标识符 词法分析算法 词法单元 词法单元例子 词法单元的模式 正则表达 ...
- Google V8引擎浅析
前端开发人员都会遇到一个流行词:V8.它的流行程度很大一部分是因为它将JavaScript的性能提升到了一个新的水平.是的,V8很快.但它是如何发挥它的魔力? 前言 源代码:https://sourc ...
- go string 换行_从词法分析角度聊 Go 代码组成
之前的 Go 笔记系列,已经完成到了开发环境搭建,原本接下来的计划就是到语法部分了,但后来一直没有前进.主要是因为当时的工作比较忙,分散了精力,于是就暂时放下了. 最近,准备重新把之前计划捡起来. 第 ...
- php 词法分析,【PHP7源码学习】2019-03-20 PHP词法分析
baiyan 基本概念 在PHP7中,当一个脚本运行请求或到来时,PHP代码首先会被加载到内存中,随后进行词法分析和语法分析并生成抽象语法树(AST),然后进行深度优先遍历并生成opcodes,并在z ...
最新文章
- Java中常见数据结构Map之HashMap
- 无节操cocos2d-js游戏
- 四、【线性表】线性表的顺序表示和实现
- Windows.form增删改查
- linux把硬盘当内存,把内存当硬盘使,让你的linux程序运转如飞(在linux下用firefox在线写csdn的blog再也不卡了)...
- Win7电脑,无法把文件保存到桌面上?
- geolocation/ 百度地图api Geolocation 定位当前城市信息
- python classmethod函数_在python中使用与instance和classmethod相同的函数
- 飞机上终于能开着手机连 Wi-Fi 了,它背后的技术原理是什么?
- javascript 中的prompt 用法
- 3D建模设计软件Rhino 7 for Mac
- Windows活动目录(域服务器)经典系列图文教程
- Visual Studio 2019密钥
- 别让这个时代越来越冷漠
- 多功能AD杀手AD7124 效果理想 原理图经验分享+实物图
- 宏基4752g linux驱动下载,宏碁笔记本及应用程序驱动下载_硬件驱动下载
- tcpdf不支持html,TCPDF:无法从HTML输入生成PDF文件
- 仙剑奇侠传五破解方法(虚拟机版)
- 三消类游戏的核心算法
- 大数据开发的面试总结