@@ -4,42 +4,44 @@ import "core:fmt"
44import " core:os"
55import " core:strings"
66
7- OPCODE_MASK :: 0b11111100
8- D_MASK :: 0b00000010
9- W_MASK :: 0b00000001
10- MOD_MASK :: 0b11000000
11- REG_MASK :: 0b00111000
12- RM_MASK :: 0b00000111
13-
14- OPCODE_OFFSET :: 2
7+ // NOTE: remove constants as these patterns differ from case to case
8+ OPCODE_MASK :: 0b11111100
9+ D_MASK :: 0b00000010
10+ W_MASK :: 0b00000001
11+ MOD_MASK :: 0b11000000
12+ REG_MASK :: 0b00111000
13+ RM_MASK :: 0b00000111
14+
15+ OPCODE_OFFSET :: 2
1516D_OFFSET :: 1
1617MOD_OFFSET :: 6
1718REG_OFFSET :: 3
19+ RM_OFFSET :: 0
20+
21+ AX :: 0b110 // NOTE: Accumulator is a special case as it has 16-bit displacement when used with mod=00
1822
19- OPCODE :: enum {
20- MOV = 0b100010
23+ OPCODES :: enum {
24+ REG_RM = 0b100010 ,
25+ IMMEDIATE_RM = 0b110011 ,
26+ IMMEDIATE_REG = 0b1011 ,
2127}
2228
23- REG_LH :: enum {
24- AL = 0b000 ,
25- CL = 0b001 ,
26- DL = 0b010 ,
27- BL = 0b011 ,
28- AH = 0b100 ,
29- CH = 0b101 ,
30- DH = 0b110 ,
31- BH = 0b111
29+
30+ MOD :: enum {
31+ DISP_NO = 0b00 ,
32+ DISP_LO = 0b01 ,
33+ DISP_HI = 0b10 ,
34+ REG = 0b11
3235}
3336
34- REG_X :: enum {
35- AX = 0b000 ,
36- CX = 0b001 ,
37- DX = 0b010 ,
38- BX = 0b011 ,
39- SP = 0b100 ,
40- BP = 0b101 ,
41- SI = 0b110 ,
42- DI = 0b111
37+ parse_reg :: proc (reg_code: u8 , w: bool ) -> string {
38+ reg_names: [16 ]string = {" al" , " cl" , " dl" , " bl" , " ah" , " ch" , " dh" , " bh" ,
39+ " ax" , " cx" , " dx" , " bx" , " sp" , " bp" , " si" , " di" }
40+ if w {
41+ return reg_names[reg_code + 8 ]
42+ } else {
43+ return reg_names[reg_code]
44+ }
4345}
4446
4547read_instructions :: proc (path: string ) -> []u8 {
@@ -59,72 +61,85 @@ read_instructions :: proc(path: string) -> []u8 {
5961 return buffer[:bytes]
6062}
6163
62- parse_instruction :: proc (data: []u8 , index: int ) -> (u8 , u8 , u8 , u8 , u8 , u8 ){
63- opcode_byte := data[index]
64- modrm_byte := data[index+1 ]
64+ iterator_limit: int = 20
6565
66- opcode := (opcode_byte & OPCODE_MASK) >> OPCODE_OFFSET
67- d := (opcode_byte & D_MASK) >> D_OFFSET
68- w := (opcode_byte & W_MASK)
69- mod := (modrm_byte & MOD_MASK) >> MOD_OFFSET
70- reg := (modrm_byte & REG_MASK) >> REG_OFFSET
71- rm := (modrm_byte & RM_MASK)
66+ parse_instructions :: proc (data: []u8 ) -> string {
67+ decoded_str := " Bits 16\n\n "
68+ data_len := len (data)
69+ fmt.printfln (" %b" , data)
7270
73- return opcode, d, w, mod, reg, rm
74- }
71+ // TODO: need to iterate by instruction size
72+ for i:=0 ; i < iterator_limit; {
73+ opcode_byte := data[i]
74+
75+ mnemonic := " mov"
76+ formatted_instruction: string
77+ dest: string
78+ src: string
79+
80+ w: bool
81+ d: bool
82+ reg: u8
83+ rm: u8
84+ size:int = 2
7585
76- opcode_to_string :: proc (opcode_byte: u8 ) -> string {
77- switch opcode_byte {
78- case u8 (OPCODE.MOV):
79- return " mov"
80- case :
81- panic (" unrecognized opcode" )
82- }
83- }
86+ if (opcode_byte >> 2 == u8 (OPCODES.REG_RM)) {
87+
88+ data_byte := data[i + 1 ]
89+
90+ w = (opcode_byte & 0b00000001 ) != 0
91+ d = (opcode_byte & 0b00000010 ) != 0
8492
85- reg_to_string :: proc (reg_byte: u8 , w: u8 ) -> string {
86- if w == 1 {
87- switch reg_byte {
88- case u8 (REG_X.AX): return " ax"
89- case u8 (REG_X.CX): return " cx"
90- case u8 (REG_X.DX): return " dx"
91- case u8 (REG_X.BX): return " bx"
92- case u8 (REG_X.SP): return " sp"
93- case u8 (REG_X.BP): return " bp"
94- case u8 (REG_X.SI): return " si"
95- case u8 (REG_X.DI): return " di"
96- case :
97- panic (" unrecognized registry encoding" )
93+ reg = (data_byte & REG_MASK) >> REG_OFFSET
94+ rm = (data_byte & RM_MASK)
95+ mod := (data_byte & MOD_MASK) >> MOD_OFFSET
96+ if (d) {
97+ dest = parse_reg (reg, w)
98+ src = parse_reg (rm, w)
99+ } else {
100+ dest = parse_reg (rm, w)
101+ src = parse_reg (reg, w)
102+ }
98103 }
99- } else {
100- switch reg_byte {
101- case u8 (REG_LH.AL): return " al"
102- case u8 (REG_LH.CL): return " cl"
103- case u8 (REG_LH.DL): return " dl"
104- case u8 (REG_LH.BL): return " bl"
105- case u8 (REG_LH.AH): return " ah"
106- case u8 (REG_LH.CH): return " ch"
107- case u8 (REG_LH.DH): return " dh"
108- case u8 (REG_LH.BH): return " bh"
109- case :
110- panic (" unrecognized registry encoding" )
104+ else if (opcode_byte >> 4 == u8 (OPCODES.IMMEDIATE_REG)) {
105+
106+ w = (opcode_byte & 0b00001000 ) != 0
107+ reg = (opcode_byte & 0b00000111 )
108+ dest = parse_reg (reg, w)
109+ if w {
110+ size = 3
111+ low_byte := data[i + 1 ] // data
112+ high_byte := data[i + 2 ] // disp_lo
113+ value := concat_bits (low_byte, high_byte)
114+ src = fmt.aprintf (" %d" , parse_sign_u16 (value))
115+ } else {
116+ data_byte := data[i + 1 ]
117+ src = fmt.aprintf (" %d" , parse_sign_u8 (data_byte))
118+ }
111119 }
120+ formatted_instruction = fmt.aprintf (" %s %s, %s \n " , mnemonic, dest, src)
121+ decoded_str = strings.concatenate ({decoded_str, formatted_instruction})
122+ i+=size
112123 }
124+ return decoded_str
113125}
114126
115- format_instructions :: proc (data: []u8 ) -> string {
116- result := strings.Builder{}
117- defer strings.builder_destroy (&result)
118-
119- str := " bits 16\n\n "
127+ parse_sign_u8 :: proc (value: u8 ) -> int {
128+ if value & 0x80 != 0 { // Check if the MSB (bit 8) is set
129+ return int (i8 (value))
130+ } else {
131+ return int (value)
132+ }
133+ }
120134
121- for i := 0 ; i < len (data) - 1 ; i += 2 {
122- opcode, d, w, mod, reg, rm := parse_instruction (data, i)
123- opcode_str := opcode_to_string (opcode)
124- source := reg_to_string (reg, w)
125- dest := reg_to_string (rm, w)
126- line := fmt.aprintf (" %s %s, %s\n " , opcode_str, dest, source)
127- str = strings.concatenate ({str, line})
135+ parse_sign_u16 :: proc (value: u16 ) -> int {
136+ if value & 0x8000 != 0 { // Check if the MSB (bit 16) is set
137+ return int (i16 (value))
138+ } else {
139+ return int (value)
128140 }
129- return str
130141}
142+
143+ concat_bits :: proc (low_byte: u8 , high_byte: u8 , ) -> u16 {
144+ return u16 (low_byte) | u16 (high_byte) << 8
145+ }
0 commit comments