From 1051071202c2b2fc946a6778c91a8b93cd7e5d4d Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 4 Mar 2025 22:05:12 -0800 Subject: [PATCH] Handle rip-relative addressing The idea to parse `rip` specially rather than as a register is due to Andres. --- src/Assembly/Equality.v | 17 ++++++++- src/Assembly/Parse.v | 80 +++++++++++++++++++++++++++-------------- src/Assembly/Syntax.v | 15 ++++++-- 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/src/Assembly/Equality.v b/src/Assembly/Equality.v index 336da900e8..9877763493 100644 --- a/src/Assembly/Equality.v +++ b/src/Assembly/Equality.v @@ -102,6 +102,20 @@ Proof. rewrite <- AccessSize_beq_eq; destruct (x =? y)%AccessSize; intuition con Global Instance AccessSize_beq_compat : Proper (eq ==> eq ==> eq) AccessSize_beq | 10. Proof. repeat intro; subst; reflexivity. Qed. +Declare Scope rip_relative_kind_scope. +Delimit Scope rip_relative_kind_scope with rip_relative_kind. +Bind Scope rip_relative_kind_scope with rip_relative_kind. + +Infix "=?" := rip_relative_kind_beq : rip_relative_kind_scope. + +Global Instance rip_relative_kind_beq_spec : reflect_rel (@eq rip_relative_kind) rip_relative_kind_beq | 10 + := reflect_of_beq internal_rip_relative_kind_dec_bl internal_rip_relative_kind_dec_lb. +Definition rip_relative_kind_beq_eq x y : (x =? y)%rip_relative_kind = true <-> x = y := conj (@internal_rip_relative_kind_dec_bl _ _) (@internal_rip_relative_kind_dec_lb _ _). +Lemma rip_relative_kind_beq_neq x y : (x =? y)%rip_relative_kind = false <-> x <> y. +Proof. rewrite <- rip_relative_kind_beq_eq; destruct (x =? y)%rip_relative_kind; intuition congruence. Qed. +Global Instance rip_relative_kind_beq_compat : Proper (eq ==> eq ==> eq) rip_relative_kind_beq | 10. +Proof. repeat intro; subst; reflexivity. Qed. + Declare Scope MEM_scope. Delimit Scope MEM_scope with MEM. Bind Scope MEM_scope with MEM. @@ -111,7 +125,8 @@ Definition MEM_beq (x y : MEM) : bool && option_beq String.eqb x.(mem_base_label) y.(mem_base_label) && (option_beq REG_beq x.(mem_base_reg) y.(mem_base_reg)) && (option_beq (prod_beq _ _ Z.eqb REG_beq) x.(mem_scale_reg) y.(mem_scale_reg)) - && (option_beq Z.eqb x.(mem_offset) y.(mem_offset)))%bool. + && (option_beq Z.eqb x.(mem_offset) y.(mem_offset)) + && (rip_relative_kind_beq x.(rip_relative) y.(rip_relative)))%bool. Global Arguments MEM_beq !_ !_ / . Infix "=?" := MEM_beq : MEM_scope. diff --git a/src/Assembly/Parse.v b/src/Assembly/Parse.v index 9e7a53bc83..c8affb31bd 100644 --- a/src/Assembly/Parse.v +++ b/src/Assembly/Parse.v @@ -80,12 +80,16 @@ Definition parse_label : ParserAction string (fun '(char, ls) => string_of_list_ascii (char :: ls)) (([a-zA-Z] || parse_any_ascii "._?$") ;; (([a-zA-Z] || parse_any_ascii "0123456789_$#@~.?")* )). + Definition parse_non_access_size_label : ParserAction string -:= parse_lookahead_not parse_AccessSize ;;R parse_label. + := parse_lookahead_not parse_AccessSize ;;R parse_label. + +Definition parse_rip_relative_kind : ParserAction rip_relative_kind + := parse_map (fun _ => explicitly_rip_relative) "rip". -Definition parse_MEM : ParserAction MEM +Definition parse_MEM {opts : assembly_program_options} : ParserAction MEM := parse_option_list_map - (fun '(access_size, (constant_location_label, (br (*base reg*), sr (*scale reg, including z *), offset, base_label))) + (fun '(access_size, (constant_location_label, (br (*base reg*), sr (*scale reg, including z *), offset, base_label, rip_relative))) => match base_label, constant_location_label with | Some _, Some _ => (* invalid? *) None | Some _ as lbl, None @@ -96,14 +100,15 @@ Definition parse_MEM : ParserAction MEM ; mem_base_reg := br:option REG ; mem_base_label := lbl ; mem_scale_reg := sr:option (Z * REG) - ; mem_offset := offset:option Z |} + ; mem_offset := offset:option Z + ; rip_relative := rip_relative:rip_relative_kind |} end) (((strip_whitespace_after parse_AccessSize)?) ;; (parse_non_access_size_label?) ;; (parse_option_list_map (fun '(offset, vars) => (vars <-- List.map (fun '(c, (v, e), vs) => match vs, e with [], 1%Z => Some (c, v) | _, _ => None end) vars; - let regs : list (Z * REG) := Option.List.map (fun '(c, v) => match v with inl v => Some (c, v) | inr _ => None end) vars in + let regs : list (Z * (REG + rip_relative_kind)) := Option.List.map (fun '(c, v) => match v with inl v => Some (c, v) | inr _ => None end) vars in let labels : list (Z * string) := Option.List.map (fun '(c, v) => match v with inr v => Some (c, v) | inl _ => None end) vars in base_label <- match labels with | [] => Some None @@ -114,15 +119,17 @@ Definition parse_MEM : ParserAction MEM base_scale_reg <- match regs with | [] => Some (None, None) | [(1%Z, r)] => Some (Some r, None) - | [(s, r)] => Some (None, Some (s, r)) - | [(1%Z, r1); (s, r2)] - | [(s, r2); (1%Z, r1)] + | [(s, inl r)] => Some (None, Some (s, r)) + | [(1%Z, r1); (s, inl r2)] + | [(s, inl r2); (1%Z, r1)] => Some (Some r1, Some (s, r2)) | _ => None end; let '(br, sr) := base_scale_reg in - Some (br (*base reg*), sr (*scale reg, including z *), offset, base_label))%option) - ("[" ;;R parse_Z_poly_strict (sum_beq _ _ REG_beq String.eqb) (parse_or_else_gen (fun x => x) parse_REG parse_label) ;;L "]"))). + let rip_relative := match br with Some (inr k) => k | _ => if default_rel then implicitly_rip_relative else not_rip_relative end in + let br := (br <- br; match br with inl br => Some br | inr _ => None end)%option in + Some (br (*base reg*), sr (*scale reg, including z *), offset, base_label, rip_relative))%option) + ("[" ;;R parse_Z_poly_strict (sum_beq _ _ (sum_beq _ _ REG_beq rip_relative_kind_beq) String.eqb) (parse_or_else_gen (fun x => x) (parse_or_else_gen (fun x => x) parse_REG parse_rip_relative_kind) parse_label) ;;L "]"))). Definition parse_CONST (const_keyword : bool) : ParserAction CONST := if const_keyword @@ -138,7 +145,7 @@ Definition parse_JUMP_LABEL : ParserAction JUMP_LABEL ((strip_whitespace_after "NEAR ")? ;; parse_label). (* we only parse something as a label if it cannot possibly be anything else, because asm is terrible and has ambiguous parses otherwise :-( *) -Definition parse_ARG (const_keyword : bool) : ParserAction ARG +Definition parse_ARG {opts : assembly_program_options} (const_keyword : bool) : ParserAction ARG := parse_or_else (parse_alt_list [parse_map reg parse_REG @@ -165,7 +172,7 @@ Definition parse_OpPrefix : ParserAction OpPrefix := parse_strs parse_OpPrefix_list. (** assumes no leading nor trailing whitespace and no comment *) -Definition parse_RawLine : ParserAction RawLine +Definition parse_RawLine {opts : assembly_program_options} : ParserAction RawLine := fun s => let s := String.trim s in (* get the first space-separated opcode *) @@ -206,7 +213,7 @@ Definition parse_RawLine : ParserAction RawLine parsed_prefix end. -Definition parse_Line (line_num : N) : ParserAction Line +Definition parse_Line {opts : assembly_program_options} (line_num : N) : ParserAction Line := fun s => let '(indentv, rest_linev) := take_while_drop_while Ascii.is_whitespace s in let '(precommentv, commentv) @@ -223,11 +230,21 @@ Definition parse_Line (line_num : N) : ParserAction Line (parse_RawLine rawlinev). (* the error is the unparsable lines *) -Fixpoint parse_Lines' (l : list string) (line_num : N) : ErrorT (list string) Lines +Fixpoint parse_Lines' {opts : assembly_program_options} (l : list string) (line_num : N) : ErrorT (list string) Lines := match l with | [] => Success [] | l :: ls - => match finalize (parse_Line line_num) l, parse_Lines' ls (line_num + 1) with + => let '(result, next_opts) := + match finalize (@parse_Line opts line_num) l with + | None => (None, opts) + | Some result => + (Some result, + match result.(rawline) with + | DEFAULT_REL => {| default_rel := true |} + | _ => opts + end) + end in + match result, @parse_Lines' next_opts ls (line_num + 1) with | None, Error ls => Error (("Line " ++ show line_num ++ ": " ++ l) :: ls) | None, Success _ => Error (("Line " ++ show line_num ++ ": " ++ l) :: nil) | Some _, Error ls => Error ls @@ -235,10 +252,13 @@ Fixpoint parse_Lines' (l : list string) (line_num : N) : ErrorT (list string) Li end end. -Definition parse_Lines (l : list string) : ErrorT (list string) Lines +Definition parse_Lines {opts : assembly_program_options} (l : list string) : ErrorT (list string) Lines := parse_Lines' (String.split_newlines l) 1. -Notation parse := parse_Lines (only parsing). +#[export] Instance default_assembly_program_options : assembly_program_options + := {| default_rel := false |}. + +Notation parse := (@parse_Lines default_assembly_program_options) (only parsing). Global Instance show_lvl_MEM : ShowLevel MEM := fun m @@ -248,11 +268,19 @@ Global Instance show_lvl_MEM : ShowLevel MEM | None => show_lvl end) (fun 'tt - => let reg_part - := (match m.(mem_base_reg), m.(mem_scale_reg) with - | (*"[Reg]" *) Some br, None => show_REG br - | (*"[Reg + Z * Reg]"*) Some br, Some (z, sr) => show_REG br ++ " + " ++ Decimal.show_Z z ++ " * " ++ show_REG sr (*only matching '+' here, because there cannot be a negative scale. *) - | (*"[ Z * Reg]"*) None, Some (z, sr) => Decimal.show_Z z ++ " * " ++ show_REG sr + => let is_explict_rip_relative := match m.(rip_relative) with explicitly_rip_relative => true | _ => false end in + let base_reg_str := + match is_explict_rip_relative, m.(mem_base_reg) with + | false, Some br => Some (show_REG br) + | false, None => None + | true, None => Some "rip" + | true, Some br => Some ("rip + " ++ show_REG br) (* but this should not happen *) + end in + let reg_part + := (match base_reg_str, m.(mem_scale_reg) with + | (*"[Reg]" *) Some br, None => br + | (*"[Reg + Z * Reg]"*) Some br, Some (z, sr) => br ++ " + " ++ Decimal.show_Z z ++ " * " ++ show_REG sr (*only matching '+' here, because there cannot be a negative scale. *) + | (*"[ Z * Reg]"*) None, Some (z, sr) => Decimal.show_Z z ++ " * " ++ show_REG sr | (*"[ ]"*) None, None => "" (* impossible, because only offset is invalid, but we seem to need it for coq because both are option's*) end%Z) in let offset_part @@ -265,8 +293,8 @@ Global Instance show_lvl_MEM : ShowLevel MEM then "0x08 * " ++ Decimal.show_Z (offset / 8) else Hex.show_Z offset) end%Z) in - match m.(mem_base_label), m.(mem_base_reg), m.(mem_offset), m.(mem_scale_reg) with - | Some lbl, Some rip, None, None => lbl ++ "[" ++ reg_part ++ offset_part ++ "]" + match m.(mem_base_label), is_explict_rip_relative, m.(mem_offset), m.(mem_scale_reg) with + | Some lbl, true, None, None => lbl ++ "[" ++ reg_part ++ offset_part ++ "]" | Some lbl, _, _, _ => let l_offset := lbl ++ offset_part in "[" ++ (if reg_part =? "" @@ -575,9 +603,9 @@ Definition get_labeled_data (ls : Lines) : list (string * list (AccessSize * lis let labeled_data := List.filter (fun '(_, data) => match data with nil => false | _ => true end) labeled_data in labeled_data. -Definition parse_assembly_options (ls : Lines) : assembly_program_options +(* Definition parse_assembly_options (ls : Lines) : assembly_program_options := {| default_rel := Option.is_Some (List.find (fun l => match l.(rawline) with | DEFAULT_REL => true | _ => false end) ls) - |}. + |}. *) diff --git a/src/Assembly/Syntax.v b/src/Assembly/Syntax.v index 29638b22a2..4b1bd0391e 100644 --- a/src/Assembly/Syntax.v +++ b/src/Assembly/Syntax.v @@ -53,10 +53,21 @@ Coercion bits_of_AccessSize (x : AccessSize) : N | qword => 64 end. -Record MEM := { mem_bits_access_size : option AccessSize ; mem_base_reg : option REG ; mem_scale_reg : option (Z * REG) ; mem_base_label : option string ; mem_offset : option Z }. +Local Set Boolean Equality Schemes. +Local Set Decidable Equality Schemes. +Variant rip_relative_kind := explicitly_rip_relative | implicitly_rip_relative | not_rip_relative. +Local Unset Boolean Equality Schemes. +Local Unset Decidable Equality Schemes. +Global Coercion bool_of_rip_relative_kind (x : rip_relative_kind) : bool := + match x with + | explicitly_rip_relative => true + | implicitly_rip_relative => true + | not_rip_relative => false + end. +Record MEM := { mem_bits_access_size : option AccessSize ; mem_base_reg : option REG ; mem_scale_reg : option (Z * REG) ; mem_base_label : option string ; mem_offset : option Z ; rip_relative : rip_relative_kind }. Definition mem_of_reg (r : REG) : MEM := - {| mem_base_reg := Some r ; mem_offset := None ; mem_scale_reg := None ; mem_bits_access_size := None ; mem_base_label := None |}. + {| mem_base_reg := Some r ; mem_offset := None ; mem_scale_reg := None ; mem_bits_access_size := None ; mem_base_label := None ; rip_relative := not_rip_relative |}. Inductive FLAG := CF | PF | AF | ZF | SF | OF.