x86: move the bytecode defintion into a separate file in x86/

At least three files (asm/assemble.c, disasm/disasm.c, and x86/insns.pl) depend on the bytecode defintions. It makes a lot more sense for them to live in an explicit documentation file in the x86/ directory. Signed-off-by: H. Peter Anvin <[email protected]>
netwide-assembler · Jul 23, 2024 · 49640ed · 49640ed
1 parent 77df155
commit 49640ed
Show file tree

Hide file tree

Showing 4 changed files with 152 additions and 137 deletions.
diff --git a/asm/assemble.c b/asm/assemble.c
@@ -34,143 +34,6 @@
 /*
  * assemble.c   code generation for the Netwide Assembler
  *
- * Bytecode specification
- * ----------------------
- *
- *
- * Codes            Mnemonic        Explanation
- *
- * \0                                       terminates the code. (Unless it's a literal of course.)
- * \1..\4                                   that many literal bytes follow in the code stream
- * \5                                       add 4 to the primary operand number (b, low octdigit)
- * \6                                       add 4 to the secondary operand number (a, middle octdigit)
- * \7                                       add 4 to both the primary and the secondary operand number
- * \10..\13                                 a literal byte follows in the code stream, to be added
- *                                          to the register value of operand 0..3
- * \14..\17                                 the position of index register operand in MIB (BND insns)
- * \20..\23         ib                      a byte immediate operand, from operand 0..3
- * \24..\27         ib,u                    a zero-extended byte immediate operand, from operand 0..3
- * \30..\33         iw                      a word immediate operand, from operand 0..3
- * \34..\37         iwd                     select between \3[0-3] and \4[0-3] depending on 16/32 bit
- *                                          assembly mode or the operand-size override on the operand
- * \40..\43         id                      a long immediate operand, from operand 0..3
- * \44..\47         iwdq                    select between \3[0-3], \4[0-3] and \5[4-7]
- *                                          depending on the address size of the instruction.
- * \50..\53         rel8                    a byte relative operand, from operand 0..3
- * \54..\57         iq                      a qword immediate operand, from operand 0..3
- * \60..\63         rel16                   a word relative operand, from operand 0..3
- * \64..\67         rel                     select between \6[0-3] and \7[0-3] depending on 16/32 bit
- *                                          assembly mode or the operand-size override on the operand
- * \70..\73         rel32                   a long relative operand, from operand 0..3
- * \74..\77         seg                     a word constant, from the _segment_ part of operand 0..3
- * \1ab             /r                      a ModRM, calculated on EA in operand a, with the reg
- *                                          field the register value of operand b.
- * \171\mab         /mrb (e.g /3r0)         a ModRM, with the reg field taken from operand a, and the m
- *                                          and b fields set to the specified values.
- * \172\ab          /is4                    the register number from operand a in bits 7..4, with
- *                                          the 4-bit immediate from operand b in bits 3..0.
- * \173\xab                                 the register number from operand a in bits 7..4, with
- *                                          the value b in bits 3..0.
- * \174..\177                               the register number from operand 0..3 in bits 7..4, and
- *                                          an arbitrary value in bits 3..0 (assembled as zero.)
- * \2ab             /b                      a ModRM, calculated on EA in operand a, with the reg
- *                                          field equal to digit b.
- * \240..\243                               this instruction uses EVEX rather than REX or VEX/XOP, with the
- *                                          V field taken from operand 0..3.
- * \250                                     this instruction uses EVEX rather than REX or VEX/XOP, with the
- *                                          V field set to 1111b.
- *
- * EVEX prefixes are followed by the sequence:
- * \cm\wlp\tup    where cm is:
- *                  cc 00m mmm
- *                  c = 2 for EVEX and mmmm is the M field (EVEX.P0[3:0])
- *                and wlp is:
- *                  00 wwl lpp
- *                  [l0]  ll = 0 (.128, .lz)
- *                  [l1]  ll = 1 (.256)
- *                  [l2]  ll = 2 (.512)
- *                  [lig] ll = 3 for EVEX.L'L don't care (always assembled as 0)
- *
- *                  [w0]  ww = 0 for W = 0
- *                  [w1]  ww = 1 for W = 1
- *                  [wig] ww = 2 for W don't care (always assembled as 0)
- *                  [ww]  ww = 3 for W used as REX.W
- *
- *                  [p0]  pp = 0 for no prefix
- *                  [60]  pp = 1 for legacy prefix 60
- *                  [f3]  pp = 2
- *                  [f2]  pp = 3
- *
- *                tup is tuple type for Disp8*N from %tuple_codes in insns.pl
- *                    (compressed displacement encoding)
- *
- * \254..\257       id,s                    a signed 32-bit operand to be extended to 64 bits.
- * \260..\263                               this instruction uses VEX/XOP rather than REX, with the
- *                                          V field taken from operand 0..3.
- * \270                                     this instruction uses VEX/XOP rather than REX, with the
- *                                          V field set to 1111b.
- * VEX/XOP prefixes are followed by the sequence:
- * \tmm\wlp        where mm is the M field; and wlp is:
- *                 00 wwl lpp
- *                 [l0]  ll = 0 for L = 0 (.128, .lz)
- *                 [l1]  ll = 1 for L = 1 (.256)
- *                 [lig] ll = 2 for L don't care (always assembled as 0)
- *
- *                 [w0]  ww = 0 for W = 0
- *                 [w1 ] ww = 1 for W = 1
- *                 [wig] ww = 2 for W don't care (always assembled as 0)
- *                 [ww]  ww = 3 for W used as REX.W
- *
- * t = 0 for VEX (C4/C5), t = 1 for XOP (8F).
- *
- * \271             hlexr                       instruction takes XRELEASE (F3) with or without lock
- * \272             hlenl                       instruction takes XACQUIRE/XRELEASE with or without lock
- * \273             hle                         instruction takes XACQUIRE/XRELEASE with lock only
- * \274..\277       ib,s                        a byte immediate operand, from operand 0..3, sign-extended
- *                                              to the operand size (if o16/o32/o64 present) or the bit size
- * \310             a16                         indicates fixed 16-bit address size, i.e. optional 0x67.
- * \311             a32                         indicates fixed 32-bit address size, i.e. optional 0x67.
- * \312             adf                         (disassembler only) invalid with non-default address size.
- * \313             a64                         indicates fixed 64-bit address size, 0x67 invalid.
- * \314             norexb                      (disassembler only) invalid with REX.B
- * \315             norexx                      (disassembler only) invalid with REX.X
- * \316             norexr                      (disassembler only) invalid with REX.R
- * \317             norexw                      (disassembler only) invalid with REX.W
- * \320             o16                         indicates fixed 16-bit operand size, i.e. optional 0x66.
- * \321             o32                         indicates fixed 32-bit operand size, i.e. optional 0x66.
- * \322             odf                         indicates that this instruction is only valid when the
- *                                              operand size is the default (instruction to disassembler,
- *                                              generates no code in the assembler)
- * \323             o64nw                       indicates fixed 64-bit operand size, REX on extensions only.
- * \324             o64                         indicates 64-bit operand size requiring REX prefix.
- * \325             nohi                        instruction which always uses spl/bpl/sil/dil
- * \326             nof3                        instruction not valid with 0xF3 REP prefix.  Hint for
-                                                disassembler only; for SSE instructions.
- * \331             norep                       instruction not valid with REP prefix.  Hint for
- *                                              disassembler only; for SSE instructions.
- * \332             f2i                         REP prefix (0xF2 byte) used as opcode extension.
- * \333             f3i                         REP prefix (0xF3 byte) used as opcode extension.
- * \334             rex.l                       LOCK prefix used as REX.R (used in non-64-bit mode)
- * \335             repe                        disassemble a rep (0xF3 byte) prefix as repe not rep.
- * \336             mustrep                     force a REP(E) prefix (0xF3) even if not specified.
- * \337             mustrepne                   force a REPNE prefix (0xF2) even if not specified.
- *                                              \336-\337 are still listed as prefixes in the disassembler.
- * \340             resb                        reserve <operand 0> bytes of uninitialized storage.
- *                                              Operand 0 had better be a segmentless constant.
- * \341             wait                        this instruction needs a WAIT "prefix"
- * \360             np                          no SSE prefix (== \364\331)
- * \361                                         66 SSE prefix (== \366\331)
- * \364             !osp                        operand-size prefix (0x66) not permitted
- * \365             !asp                        address-size prefix (0x67) not permitted
- * \366                                         operand-size prefix (0x66) used as opcode extension
- * \367                                         address-size prefix (0x67) used as opcode extension
- * \370,\371        jcc8                        match only if operand 0 meets byte jump criteria.
- *                  jmp8                        370 is used for Jcc, 371 is used for JMP.
- * \373             jlen                        assemble 0x03 if bits==16, 0x05 if bits==32;
- *                                              used for conditional jump over longer jump
- * \374             vsibx|vm32x|vm64x           this instruction takes an XMM VSIB memory EA
- * \375             vsiby|vm32y|vm64y           this instruction takes an YMM VSIB memory EA
- * \376             vsibz|vm32z|vm64z           this instruction takes an ZMM VSIB memory EA
  */
 
 #include "compiler.h"

diff --git a/disasm/disasm.c b/disasm/disasm.c
@@ -33,6 +33,9 @@
 
 /*
  * disasm.c   where all the _work_ gets done in the Netwide Disassembler
+ *
+ * See x86/bytecode.txt for the definition of the instruction encoding
+ * byte codes.
  */
 
 #include "compiler.h"