Skip to content

Commit 6e4d19b

Browse files
authored
Merge pull request #1 from oracle58/dev
[0039] Halfway there
2 parents 4683bd5 + 485d2d6 commit 6e4d19b

File tree

5 files changed

+106
-168
lines changed

5 files changed

+106
-168
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ e.g. `mov cx, bx`: `cx` = dest -> d=0 because CX sits in the r/m field.
1313

1414
**\*\*** Alternator for `r/m`. depending on mod the last 3 bits can hold an encoded reference to a register or memory addr
1515

16-
- `opcode`: e.g. 100010 or 1011 w reg
16+
- `opcode`: e.g. 100010dw or for immediate access 1011wreg
1717
- `mod`(displacement): alternates between `r` (register) and `m`(memory) and enables use of `disp_low` and `disp_high`.
1818
- 00 -> [...]
1919
- 11 -> r/m = reg

decode/decode_8086.odin

Lines changed: 100 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,44 @@ import "core:fmt"
44
import "core:os"
55
import "core:strings"
66

7-
OPCODE_MASK :: 0b11111100
8-
D_MASK :: 0b00000010
9-
W_MASK :: 0b00000001
10-
MOD_MASK :: 0b11000000
11-
REG_MASK :: 0b00111000
12-
RM_MASK :: 0b00000111
13-
14-
OPCODE_OFFSET :: 2
7+
//NOTE: remove constants as these patterns differ from case to case
8+
OPCODE_MASK :: 0b11111100
9+
D_MASK :: 0b00000010
10+
W_MASK :: 0b00000001
11+
MOD_MASK :: 0b11000000
12+
REG_MASK :: 0b00111000
13+
RM_MASK :: 0b00000111
14+
15+
OPCODE_OFFSET :: 2
1516
D_OFFSET :: 1
1617
MOD_OFFSET :: 6
1718
REG_OFFSET :: 3
19+
RM_OFFSET :: 0
20+
21+
AX :: 0b110 //NOTE: Accumulator is a special case as it has 16-bit displacement when used with mod=00
1822

19-
OPCODE :: enum {
20-
MOV = 0b100010
23+
OPCODES :: enum {
24+
REG_RM = 0b100010,
25+
IMMEDIATE_RM = 0b110011,
26+
IMMEDIATE_REG = 0b1011,
2127
}
2228

23-
REG_LH :: enum{
24-
AL = 0b000,
25-
CL = 0b001,
26-
DL = 0b010,
27-
BL = 0b011,
28-
AH = 0b100,
29-
CH = 0b101,
30-
DH = 0b110,
31-
BH = 0b111
29+
30+
MOD :: enum {
31+
DISP_NO = 0b00,
32+
DISP_LO = 0b01,
33+
DISP_HI = 0b10,
34+
REG = 0b11
3235
}
3336

34-
REG_X :: enum {
35-
AX = 0b000,
36-
CX = 0b001,
37-
DX = 0b010,
38-
BX = 0b011,
39-
SP = 0b100,
40-
BP = 0b101,
41-
SI = 0b110,
42-
DI = 0b111
37+
parse_reg :: proc(reg_code: u8, w: bool) -> string {
38+
reg_names: [16]string = {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
39+
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}
40+
if w {
41+
return reg_names[reg_code + 8]
42+
} else {
43+
return reg_names[reg_code]
44+
}
4345
}
4446

4547
read_instructions :: proc(path: string) -> []u8 {
@@ -59,72 +61,85 @@ read_instructions :: proc(path: string) -> []u8 {
5961
return buffer[:bytes]
6062
}
6163

62-
parse_instruction :: proc (data: []u8, index: int) -> (u8, u8, u8, u8, u8, u8){
63-
opcode_byte := data[index]
64-
modrm_byte := data[index+1]
64+
iterator_limit: int = 20
6565

66-
opcode := (opcode_byte & OPCODE_MASK) >> OPCODE_OFFSET
67-
d := (opcode_byte & D_MASK) >> D_OFFSET
68-
w := (opcode_byte & W_MASK)
69-
mod := (modrm_byte & MOD_MASK) >> MOD_OFFSET
70-
reg := (modrm_byte & REG_MASK) >> REG_OFFSET
71-
rm := (modrm_byte & RM_MASK)
66+
parse_instructions :: proc(data: []u8) -> string {
67+
decoded_str := "Bits 16\n\n"
68+
data_len := len(data)
69+
fmt.printfln("%b", data)
7270

73-
return opcode, d, w, mod, reg, rm
74-
}
71+
//TODO: need to iterate by instruction size
72+
for i:=0; i < iterator_limit; {
73+
opcode_byte := data[i]
74+
75+
mnemonic := "mov"
76+
formatted_instruction: string
77+
dest: string
78+
src: string
79+
80+
w: bool
81+
d: bool
82+
reg: u8
83+
rm: u8
84+
size:int = 2
7585

76-
opcode_to_string :: proc(opcode_byte: u8) -> string {
77-
switch opcode_byte {
78-
case u8(OPCODE.MOV):
79-
return "mov"
80-
case:
81-
panic("unrecognized opcode")
82-
}
83-
}
86+
if (opcode_byte >> 2 == u8(OPCODES.REG_RM)) {
87+
88+
data_byte := data[i + 1]
89+
90+
w = (opcode_byte & 0b00000001) != 0
91+
d = (opcode_byte & 0b00000010) != 0
8492

85-
reg_to_string :: proc(reg_byte: u8, w: u8) -> string {
86-
if w == 1 {
87-
switch reg_byte {
88-
case u8(REG_X.AX): return "ax"
89-
case u8(REG_X.CX): return "cx"
90-
case u8(REG_X.DX): return "dx"
91-
case u8(REG_X.BX): return "bx"
92-
case u8(REG_X.SP): return "sp"
93-
case u8(REG_X.BP): return "bp"
94-
case u8(REG_X.SI): return "si"
95-
case u8(REG_X.DI): return "di"
96-
case:
97-
panic("unrecognized registry encoding")
93+
reg = (data_byte & REG_MASK) >> REG_OFFSET
94+
rm = (data_byte & RM_MASK)
95+
mod := (data_byte & MOD_MASK) >> MOD_OFFSET
96+
if (d) {
97+
dest = parse_reg(reg, w)
98+
src = parse_reg(rm, w)
99+
} else {
100+
dest = parse_reg(rm, w)
101+
src = parse_reg(reg, w)
102+
}
98103
}
99-
} else {
100-
switch reg_byte {
101-
case u8(REG_LH.AL): return "al"
102-
case u8(REG_LH.CL): return "cl"
103-
case u8(REG_LH.DL): return "dl"
104-
case u8(REG_LH.BL): return "bl"
105-
case u8(REG_LH.AH): return "ah"
106-
case u8(REG_LH.CH): return "ch"
107-
case u8(REG_LH.DH): return "dh"
108-
case u8(REG_LH.BH): return "bh"
109-
case:
110-
panic("unrecognized registry encoding")
104+
else if (opcode_byte >> 4 == u8(OPCODES.IMMEDIATE_REG)) {
105+
106+
w = (opcode_byte & 0b00001000) != 0
107+
reg = (opcode_byte & 0b00000111)
108+
dest = parse_reg(reg, w)
109+
if w {
110+
size = 3
111+
low_byte := data[i + 1] // data
112+
high_byte := data[i + 2] // disp_lo
113+
value := concat_bits(low_byte, high_byte)
114+
src = fmt.aprintf("%d", parse_sign_u16(value))
115+
} else {
116+
data_byte := data[i + 1]
117+
src = fmt.aprintf("%d", parse_sign_u8(data_byte))
118+
}
111119
}
120+
formatted_instruction = fmt.aprintf("%s %s, %s \n", mnemonic, dest, src)
121+
decoded_str = strings.concatenate({decoded_str, formatted_instruction})
122+
i+=size
112123
}
124+
return decoded_str
113125
}
114126

115-
format_instructions :: proc(data: []u8) -> string {
116-
result := strings.Builder{}
117-
defer strings.builder_destroy(&result)
118-
119-
str := "bits 16\n\n"
127+
parse_sign_u8 :: proc(value: u8) -> int {
128+
if value & 0x80 != 0 { // Check if the MSB (bit 8) is set
129+
return int(i8(value))
130+
} else {
131+
return int(value)
132+
}
133+
}
120134

121-
for i := 0; i < len(data) - 1; i += 2 {
122-
opcode, d, w, mod, reg, rm := parse_instruction(data, i)
123-
opcode_str := opcode_to_string(opcode)
124-
source := reg_to_string(reg, w)
125-
dest := reg_to_string(rm, w)
126-
line := fmt.aprintf("%s %s, %s\n", opcode_str, dest, source)
127-
str = strings.concatenate({str, line})
135+
parse_sign_u16 :: proc(value: u16) -> int {
136+
if value & 0x8000 != 0 { // Check if the MSB (bit 16) is set
137+
return int(i16(value))
138+
} else {
139+
return int(value)
128140
}
129-
return str
130141
}
142+
143+
concat_bits :: proc(low_byte: u8, high_byte: u8, ) -> u16 {
144+
return u16(low_byte) | u16(high_byte) << 8
145+
}

main.odin

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ import "core:fmt"
44
import "decode"
55

66
main :: proc() {
7-
single_path := "./samples/listing_0037_single_register_mov"
8-
many_path := "./samples/listing_0038_many_register_mov"
7+
exercise1 := "./samples/listing_0037_single_register_mov"
8+
exercise1_bonus := "./samples/listing_0038_many_register_mov"
9+
exercise2 := "./samples/listing_0039_more_movs"
910

10-
data := decode.read_instructions(many_path)
11-
instructions := decode.format_instructions(data)
11+
data := decode.read_instructions(exercise2)
12+
instructions := decode.parse_instructions(data)
1213
fmt.printfln(instructions)
1314
}

samples/listing_0039_more_movs

41 Bytes
Binary file not shown.

tests/test_decode.odin

Lines changed: 0 additions & 78 deletions
This file was deleted.

0 commit comments

Comments
 (0)