|  | #!/bin/awk -f | 
|  | # SPDX-License-Identifier: GPL-2.0 | 
|  | # gen-insn-attr-x86.awk: Instruction attribute table generator | 
|  | # Written by Masami Hiramatsu <mhiramat@redhat.com> | 
|  | # | 
|  | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | 
|  |  | 
|  | # Awk implementation sanity check | 
|  | function check_awk_implement() { | 
|  | if (sprintf("%x", 0) != "0") | 
|  | return "Your awk has a printf-format problem." | 
|  | return "" | 
|  | } | 
|  |  | 
|  | # Clear working vars | 
|  | function clear_vars() { | 
|  | delete table | 
|  | delete lptable2 | 
|  | delete lptable1 | 
|  | delete lptable3 | 
|  | eid = -1 # escape id | 
|  | gid = -1 # group id | 
|  | aid = -1 # AVX id | 
|  | tname = "" | 
|  | } | 
|  |  | 
|  | BEGIN { | 
|  | # Implementation error checking | 
|  | awkchecked = check_awk_implement() | 
|  | if (awkchecked != "") { | 
|  | print "Error: " awkchecked > "/dev/stderr" | 
|  | print "Please try to use gawk." > "/dev/stderr" | 
|  | exit 1 | 
|  | } | 
|  |  | 
|  | # Setup generating tables | 
|  | print "/* x86 opcode map generated from x86-opcode-map.txt */" | 
|  | print "/* Do not change this code. */\n" | 
|  | ggid = 1 | 
|  | geid = 1 | 
|  | gaid = 0 | 
|  | delete etable | 
|  | delete gtable | 
|  | delete atable | 
|  |  | 
|  | opnd_expr = "^[A-Za-z/]" | 
|  | ext_expr = "^\\(" | 
|  | sep_expr = "^\\|$" | 
|  | group_expr = "^Grp[0-9A-Za-z]+" | 
|  |  | 
|  | imm_expr = "^[IJAOL][a-z]" | 
|  | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 
|  | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 
|  | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | 
|  | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | 
|  | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | 
|  | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | 
|  | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | 
|  | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | 
|  | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | 
|  | imm_flag["Ob"] = "INAT_MOFFSET" | 
|  | imm_flag["Ov"] = "INAT_MOFFSET" | 
|  | imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | 
|  |  | 
|  | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" | 
|  | force64_expr = "\\([df]64\\)" | 
|  | rex_expr = "^REX(\\.[XRWB]+)*" | 
|  | fpu_expr = "^ESC" # TODO | 
|  |  | 
|  | lprefix1_expr = "\\((66|!F3)\\)" | 
|  | lprefix2_expr = "\\(F3\\)" | 
|  | lprefix3_expr = "\\((F2|!F3|66&F2)\\)" | 
|  | lprefix_expr = "\\((66|F2|F3)\\)" | 
|  | max_lprefix = 4 | 
|  |  | 
|  | # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript | 
|  | # accepts VEX prefix | 
|  | vexok_opcode_expr = "^[vk].*" | 
|  | vexok_expr = "\\(v1\\)" | 
|  | # All opcodes with (v) superscript supports *only* VEX prefix | 
|  | vexonly_expr = "\\(v\\)" | 
|  | # All opcodes with (ev) superscript supports *only* EVEX prefix | 
|  | evexonly_expr = "\\(ev\\)" | 
|  |  | 
|  | prefix_expr = "\\(Prefix\\)" | 
|  | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | 
|  | prefix_num["REPNE"] = "INAT_PFX_REPNE" | 
|  | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | 
|  | prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" | 
|  | prefix_num["XRELEASE"] = "INAT_PFX_REPE" | 
|  | prefix_num["LOCK"] = "INAT_PFX_LOCK" | 
|  | prefix_num["SEG=CS"] = "INAT_PFX_CS" | 
|  | prefix_num["SEG=DS"] = "INAT_PFX_DS" | 
|  | prefix_num["SEG=ES"] = "INAT_PFX_ES" | 
|  | prefix_num["SEG=FS"] = "INAT_PFX_FS" | 
|  | prefix_num["SEG=GS"] = "INAT_PFX_GS" | 
|  | prefix_num["SEG=SS"] = "INAT_PFX_SS" | 
|  | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | 
|  | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" | 
|  | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" | 
|  | prefix_num["EVEX"] = "INAT_PFX_EVEX" | 
|  |  | 
|  | clear_vars() | 
|  | } | 
|  |  | 
|  | function semantic_error(msg) { | 
|  | print "Semantic error at " NR ": " msg > "/dev/stderr" | 
|  | exit 1 | 
|  | } | 
|  |  | 
|  | function debug(msg) { | 
|  | print "DEBUG: " msg | 
|  | } | 
|  |  | 
|  | function array_size(arr,   i,c) { | 
|  | c = 0 | 
|  | for (i in arr) | 
|  | c++ | 
|  | return c | 
|  | } | 
|  |  | 
|  | /^Table:/ { | 
|  | print "/* " $0 " */" | 
|  | if (tname != "") | 
|  | semantic_error("Hit Table: before EndTable:."); | 
|  | } | 
|  |  | 
|  | /^Referrer:/ { | 
|  | if (NF != 1) { | 
|  | # escape opcode table | 
|  | ref = "" | 
|  | for (i = 2; i <= NF; i++) | 
|  | ref = ref $i | 
|  | eid = escape[ref] | 
|  | tname = sprintf("inat_escape_table_%d", eid) | 
|  | } | 
|  | } | 
|  |  | 
|  | /^AVXcode:/ { | 
|  | if (NF != 1) { | 
|  | # AVX/escape opcode table | 
|  | aid = $2 | 
|  | if (gaid <= aid) | 
|  | gaid = aid + 1 | 
|  | if (tname == "")	# AVX only opcode table | 
|  | tname = sprintf("inat_avx_table_%d", $2) | 
|  | } | 
|  | if (aid == -1 && eid == -1)	# primary opcode table | 
|  | tname = "inat_primary_table" | 
|  | } | 
|  |  | 
|  | /^GrpTable:/ { | 
|  | print "/* " $0 " */" | 
|  | if (!($2 in group)) | 
|  | semantic_error("No group: " $2 ) | 
|  | gid = group[$2] | 
|  | tname = "inat_group_table_" gid | 
|  | } | 
|  |  | 
|  | function print_table(tbl,name,fmt,n) | 
|  | { | 
|  | print "const insn_attr_t " name " = {" | 
|  | for (i = 0; i < n; i++) { | 
|  | id = sprintf(fmt, i) | 
|  | if (tbl[id]) | 
|  | print "	[" id "] = " tbl[id] "," | 
|  | } | 
|  | print "};" | 
|  | } | 
|  |  | 
|  | /^EndTable/ { | 
|  | if (gid != -1) { | 
|  | # print group tables | 
|  | if (array_size(table) != 0) { | 
|  | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | 
|  | "0x%x", 8) | 
|  | gtable[gid,0] = tname | 
|  | } | 
|  | if (array_size(lptable1) != 0) { | 
|  | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | 
|  | "0x%x", 8) | 
|  | gtable[gid,1] = tname "_1" | 
|  | } | 
|  | if (array_size(lptable2) != 0) { | 
|  | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | 
|  | "0x%x", 8) | 
|  | gtable[gid,2] = tname "_2" | 
|  | } | 
|  | if (array_size(lptable3) != 0) { | 
|  | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | 
|  | "0x%x", 8) | 
|  | gtable[gid,3] = tname "_3" | 
|  | } | 
|  | } else { | 
|  | # print primary/escaped tables | 
|  | if (array_size(table) != 0) { | 
|  | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | 
|  | "0x%02x", 256) | 
|  | etable[eid,0] = tname | 
|  | if (aid >= 0) | 
|  | atable[aid,0] = tname | 
|  | } | 
|  | if (array_size(lptable1) != 0) { | 
|  | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | 
|  | "0x%02x", 256) | 
|  | etable[eid,1] = tname "_1" | 
|  | if (aid >= 0) | 
|  | atable[aid,1] = tname "_1" | 
|  | } | 
|  | if (array_size(lptable2) != 0) { | 
|  | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | 
|  | "0x%02x", 256) | 
|  | etable[eid,2] = tname "_2" | 
|  | if (aid >= 0) | 
|  | atable[aid,2] = tname "_2" | 
|  | } | 
|  | if (array_size(lptable3) != 0) { | 
|  | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | 
|  | "0x%02x", 256) | 
|  | etable[eid,3] = tname "_3" | 
|  | if (aid >= 0) | 
|  | atable[aid,3] = tname "_3" | 
|  | } | 
|  | } | 
|  | print "" | 
|  | clear_vars() | 
|  | } | 
|  |  | 
|  | function add_flags(old,new) { | 
|  | if (old && new) | 
|  | return old " | " new | 
|  | else if (old) | 
|  | return old | 
|  | else | 
|  | return new | 
|  | } | 
|  |  | 
|  | # convert operands to flags. | 
|  | function convert_operands(count,opnd,       i,j,imm,mod) | 
|  | { | 
|  | imm = null | 
|  | mod = null | 
|  | for (j = 1; j <= count; j++) { | 
|  | i = opnd[j] | 
|  | if (match(i, imm_expr) == 1) { | 
|  | if (!imm_flag[i]) | 
|  | semantic_error("Unknown imm opnd: " i) | 
|  | if (imm) { | 
|  | if (i != "Ib") | 
|  | semantic_error("Second IMM error") | 
|  | imm = add_flags(imm, "INAT_SCNDIMM") | 
|  | } else | 
|  | imm = imm_flag[i] | 
|  | } else if (match(i, modrm_expr)) | 
|  | mod = "INAT_MODRM" | 
|  | } | 
|  | return add_flags(imm, mod) | 
|  | } | 
|  |  | 
|  | /^[0-9a-f]+:/ { | 
|  | if (NR == 1) | 
|  | next | 
|  | # get index | 
|  | idx = "0x" substr($1, 1, index($1,":") - 1) | 
|  | if (idx in table) | 
|  | semantic_error("Redefine " idx " in " tname) | 
|  |  | 
|  | # check if escaped opcode | 
|  | if ("escape" == $2) { | 
|  | if ($3 != "#") | 
|  | semantic_error("No escaped name") | 
|  | ref = "" | 
|  | for (i = 4; i <= NF; i++) | 
|  | ref = ref $i | 
|  | if (ref in escape) | 
|  | semantic_error("Redefine escape (" ref ")") | 
|  | escape[ref] = geid | 
|  | geid++ | 
|  | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | 
|  | next | 
|  | } | 
|  |  | 
|  | variant = null | 
|  | # converts | 
|  | i = 2 | 
|  | while (i <= NF) { | 
|  | opcode = $(i++) | 
|  | delete opnds | 
|  | ext = null | 
|  | flags = null | 
|  | opnd = null | 
|  | # parse one opcode | 
|  | if (match($i, opnd_expr)) { | 
|  | opnd = $i | 
|  | count = split($(i++), opnds, ",") | 
|  | flags = convert_operands(count, opnds) | 
|  | } | 
|  | if (match($i, ext_expr)) | 
|  | ext = $(i++) | 
|  | if (match($i, sep_expr)) | 
|  | i++ | 
|  | else if (i < NF) | 
|  | semantic_error($i " is not a separator") | 
|  |  | 
|  | # check if group opcode | 
|  | if (match(opcode, group_expr)) { | 
|  | if (!(opcode in group)) { | 
|  | group[opcode] = ggid | 
|  | ggid++ | 
|  | } | 
|  | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | 
|  | } | 
|  | # check force(or default) 64bit | 
|  | if (match(ext, force64_expr)) | 
|  | flags = add_flags(flags, "INAT_FORCE64") | 
|  |  | 
|  | # check REX prefix | 
|  | if (match(opcode, rex_expr)) | 
|  | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") | 
|  |  | 
|  | # check coprocessor escape : TODO | 
|  | if (match(opcode, fpu_expr)) | 
|  | flags = add_flags(flags, "INAT_MODRM") | 
|  |  | 
|  | # check VEX codes | 
|  | if (match(ext, evexonly_expr)) | 
|  | flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") | 
|  | else if (match(ext, vexonly_expr)) | 
|  | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | 
|  | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) | 
|  | flags = add_flags(flags, "INAT_VEXOK") | 
|  |  | 
|  | # check prefixes | 
|  | if (match(ext, prefix_expr)) { | 
|  | if (!prefix_num[opcode]) | 
|  | semantic_error("Unknown prefix: " opcode) | 
|  | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | 
|  | } | 
|  | if (length(flags) == 0) | 
|  | continue | 
|  | # check if last prefix | 
|  | if (match(ext, lprefix1_expr)) { | 
|  | lptable1[idx] = add_flags(lptable1[idx],flags) | 
|  | variant = "INAT_VARIANT" | 
|  | } | 
|  | if (match(ext, lprefix2_expr)) { | 
|  | lptable2[idx] = add_flags(lptable2[idx],flags) | 
|  | variant = "INAT_VARIANT" | 
|  | } | 
|  | if (match(ext, lprefix3_expr)) { | 
|  | lptable3[idx] = add_flags(lptable3[idx],flags) | 
|  | variant = "INAT_VARIANT" | 
|  | } | 
|  | if (!match(ext, lprefix_expr)){ | 
|  | table[idx] = add_flags(table[idx],flags) | 
|  | } | 
|  | } | 
|  | if (variant) | 
|  | table[idx] = add_flags(table[idx],variant) | 
|  | } | 
|  |  | 
|  | END { | 
|  | if (awkchecked != "") | 
|  | exit 1 | 
|  |  | 
|  | print "#ifndef __BOOT_COMPRESSED\n" | 
|  |  | 
|  | # print escape opcode map's array | 
|  | print "/* Escape opcode map array */" | 
|  | print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ | 
|  | "[INAT_LSTPFX_MAX + 1] = {" | 
|  | for (i = 0; i < geid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (etable[i,j]) | 
|  | print "	["i"]["j"] = "etable[i,j]"," | 
|  | print "};\n" | 
|  | # print group opcode map's array | 
|  | print "/* Group opcode map array */" | 
|  | print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ | 
|  | "[INAT_LSTPFX_MAX + 1] = {" | 
|  | for (i = 0; i < ggid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (gtable[i,j]) | 
|  | print "	["i"]["j"] = "gtable[i,j]"," | 
|  | print "};\n" | 
|  | # print AVX opcode map's array | 
|  | print "/* AVX opcode map array */" | 
|  | print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ | 
|  | "[INAT_LSTPFX_MAX + 1] = {" | 
|  | for (i = 0; i < gaid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (atable[i,j]) | 
|  | print "	["i"]["j"] = "atable[i,j]"," | 
|  | print "};\n" | 
|  |  | 
|  | print "#else /* !__BOOT_COMPRESSED */\n" | 
|  |  | 
|  | print "/* Escape opcode map array */" | 
|  | print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \ | 
|  | "[INAT_LSTPFX_MAX + 1];" | 
|  | print "" | 
|  |  | 
|  | print "/* Group opcode map array */" | 
|  | print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\ | 
|  | "[INAT_LSTPFX_MAX + 1];" | 
|  | print "" | 
|  |  | 
|  | print "/* AVX opcode map array */" | 
|  | print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\ | 
|  | "[INAT_LSTPFX_MAX + 1];" | 
|  | print "" | 
|  |  | 
|  | print "static void inat_init_tables(void)" | 
|  | print "{" | 
|  |  | 
|  | # print escape opcode map's array | 
|  | print "\t/* Print Escape opcode map array */" | 
|  | for (i = 0; i < geid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (etable[i,j]) | 
|  | print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";" | 
|  | print "" | 
|  |  | 
|  | # print group opcode map's array | 
|  | print "\t/* Print Group opcode map array */" | 
|  | for (i = 0; i < ggid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (gtable[i,j]) | 
|  | print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";" | 
|  | print "" | 
|  | # print AVX opcode map's array | 
|  | print "\t/* Print AVX opcode map array */" | 
|  | for (i = 0; i < gaid; i++) | 
|  | for (j = 0; j < max_lprefix; j++) | 
|  | if (atable[i,j]) | 
|  | print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";" | 
|  |  | 
|  | print "}" | 
|  | print "#endif" | 
|  | } | 
|  |  |