1
1
package twg2 .jbcm ;
2
2
3
+ import java .util .ArrayList ;
4
+ import java .util .Collections ;
5
+ import java .util .List ;
6
+ import java .util .concurrent .atomic .AtomicInteger ;
7
+
3
8
import twg2 .collections .primitiveCollections .IntArrayList ;
4
9
import twg2 .collections .primitiveCollections .IntListReadOnly ;
5
10
import twg2 .jbcm .Opcodes .Type ;
11
+ import twg2 .jbcm .ir .JumpConditionInfo ;
12
+ import twg2 .jbcm .ir .JumpConditionInfo .UsageHint ;
6
13
7
14
/** Trace all possible paths through the code in a method. A code flow follows jump, branch/condition, return, and throw instructions.
8
15
* Circular paths end at the first jump/branch destination which already exists in the code flow.
9
16
* @author TeamworkGuy2
10
17
* @since 2020-12-03
11
18
*/
12
19
public class CodeFlow {
20
+ /** The size of a GOTO instruction, 1 byte opcode + 2 byte operand */
21
+ public static final int GOTO_SIZE = 3 ;
22
+
13
23
14
24
/** Starting at a given point in a bytecode array, follow code jumps and branches to all termination (return/throw) points potentially reachable from the starting point
15
25
* @param idx the starting point
@@ -20,22 +30,32 @@ public class CodeFlow {
20
30
* and can easily be converted back by negating them again. This differentiates non-terminal indexes from all
21
31
* valid terminal indexes because valid code indexes cannot be less than 0.
22
32
*/
23
- public static IntArrayList getFlowPaths (int idx , byte [] instr , IntArrayList dstPath ) {
24
- for (int i = idx , size = instr .length ; i < size ; i ++) {
25
- Opcodes opc = Opcodes .get (instr [i ] & 0xFF );
33
+ public static IntArrayList getFlowPaths (byte [] code , int idx ) {
34
+ var dstPath = new IntArrayList ();
35
+ getFlowPaths (code , idx , code .length , dstPath , 0 );
36
+ return dstPath ;
37
+ }
38
+
39
+
40
+ public static int getFlowPaths (byte [] code , int idx , int max , IntArrayList dstPath , int pathJumps ) {
41
+ for (int i = idx ; i < max ; i ++) {
42
+ Opcodes opc = Opcodes .get (code [i ]);
26
43
int numOperands = opc .getOperandCount ();
27
44
28
45
// Type.JUMP instruction set includes all Type.CONDITION instructions
29
46
if (opc .hasBehavior (Type .JUMP )) {
30
- // follow the jump path if it has not already been followed (to avoid loops)
31
- if (! dstPath .contains (~i )) {
32
- dstPath . add (~ i ) ;
47
+ // skip the jump path if it has already been followed and this is the beginning (to avoid loops)
48
+ if (dstPath .contains (~i ) && pathJumps == 0 ) {
49
+ break ;
33
50
}
34
- int jumpDst = opc .getJumpDestination (instr , i );
51
+ dstPath .add (~i );
52
+ pathJumps ++;
53
+ int jumpDst = opc .getJumpDestination (code , i );
35
54
if (jumpDst < 0 ) {
36
- jumpDst = opc .getJumpDestination (instr , i );
55
+ jumpDst = opc .getJumpDestination (code , i );
37
56
}
38
- getFlowPaths (jumpDst , instr , dstPath );
57
+ int subPathJumps = getFlowPaths (code , jumpDst , max , dstPath , pathJumps );
58
+ pathJumps = subPathJumps ;
39
59
40
60
// end this code path if the jump path is unconditional (i.e. GOTO or JSR)
41
61
if (!opc .hasBehavior (Type .CONDITION )) {
@@ -45,13 +65,224 @@ public static IntArrayList getFlowPaths(int idx, byte[] instr, IntArrayList dstP
45
65
// end this code flow path once a terminal instruction is reached
46
66
else if (opc .hasBehavior (Type .RETURN ) || opc == Opcodes .ATHROW ) {
47
67
dstPath .add (i );
68
+ pathJumps = 0 ;
48
69
break ;
49
70
}
50
71
51
72
i += (numOperands < 0 ? 0 : numOperands );
52
73
}
53
74
54
- return dstPath ;
75
+ return pathJumps ;
76
+ }
77
+
78
+
79
+ /**
80
+ * @param code the code array
81
+ * @param offset the offset into the code array at which to start finding instructions
82
+ * @param length the number of bytes of the code array to check through
83
+ * @return
84
+ */
85
+ public static List <JumpConditionInfo > findFlowConditions (byte [] code , int offset , int length ) {
86
+ var conditions = new ArrayList <JumpConditionInfo >(); // track GOTO/IF_* loops detected in the code
87
+
88
+ // BYTECODE LOOP:
89
+ for (int i = offset , size = offset + length ; i < size ; i ++) {
90
+ Opcodes opc = Opcodes .get (code [i ]);
91
+ int numOperands = opc .getOperandCount ();
92
+ // Special handling for instructions with unpredictable byte code lengths
93
+ if (numOperands == Opcodes .Const .UNPREDICTABLE ) {
94
+ if (Opcodes .WIDE .is (code [i ])) {
95
+ i ++; // WIDE opcodes are nested around other operations
96
+ opc = Opcodes .get (code [i ]);
97
+ numOperands = opc .getOperandCount () * 2 ; // WIDE opcodes double the operands of the widened opcode
98
+ }
99
+ else if (Opcodes .TABLESWITCH .is (code [i ])) {
100
+ throw new IllegalStateException ("tableswitch code handling not implemented" );
101
+ }
102
+ else if (Opcodes .LOOKUPSWITCH .is (code [i ])) {
103
+ throw new IllegalStateException ("lookupswitch code handling not implemented" );
104
+ }
105
+ }
106
+ int jumpRelative = CodeUtility .loadOperands (numOperands , code , i );
107
+
108
+ // form 1: [..., GOTO <setup_if[0]>, instructions[], setup_if[], IF_* <instructions[0]>, ...] - for()/while() forward GOTO, condition after loop with backward jump
109
+ // form 2: [..., setup_if[], IF_* <after[0]>, instructions[], GOTO <setup_if[0]>, after[], ...] - for()/while() condition before loop with forward jump, backward GOTO
110
+ // form 3: [..., instructions[], setup_if[], IF_* <instructions[0]>, after[], ...] - do{}while() condition after loop with backward jump
111
+ var isJump = opc .hasBehavior (Opcodes .Type .JUMP );
112
+ // backward jump, required for a loop (thought experiment: create a loop, using Java bytecodes, that does not jump backward)
113
+ // although a code obfuscator could re-arrange code and include backward jumps so not all backward jumps are loops
114
+ if (isJump && jumpRelative < 0 ) {
115
+ conditions .add (JumpConditionInfo .loadConditionFlow (opc , i , jumpRelative , code , UsageHint .FOR_OR_WHILE_LOOP ));
116
+ // 'for' or 'while' loop has to evaluate the condition first so it needs an IF or GOTO at the beginning
117
+ // 'do-while' loop evaluates condition after loop runs once, only compiled form seen so far is: no GOTO and one backward jump at the end
118
+ }
119
+ else if (opc .hasBehavior (Opcodes .Type .CONDITION )) {
120
+ conditions .add (JumpConditionInfo .loadConditionFlow (opc , i , jumpRelative , code , UsageHint .IF ));
121
+ }
122
+ i += (numOperands < 0 ) ? 0 : numOperands ;
123
+ }
124
+
125
+ Collections .sort (conditions , JumpConditionInfo .LOWER_INDEX_SORTER );
126
+
127
+ // post processing - convert special cases
128
+ for (int i = 0 , size = conditions .size (); i < size ; i ++) {
129
+ var loop = conditions .get (i );
130
+ // find and convert if-conditions that may have been miss-identified as loops
131
+ // case: an if-statement inside a loop where there are no instructions after the if-statement and before the
132
+ // end of the loop may be compiled as a condition with a backward jump and thus look like a loop, we can tell
133
+ // in the case when it shares the same jump destination as the closest parent loop that contains it
134
+ // form: [..., loop_start, instructions[], setup_if[], IF_* <loop_start>, instructions_in_if[], loop_end, ...]
135
+ if (loop .targetOffset < 0 ) {
136
+ var targetIndex = loop .getTargetIndex ();
137
+ var loopUpperIndex = loop .getUpperIndex ();
138
+ // look at conditions beyond the current one since they are later in the code or contained within the
139
+ // current loop and a nested if-statement is contained within the nearest parent loop
140
+ for (int j = i + 1 ; j < size ; j ++) {
141
+ var loopJ = conditions .get (j );
142
+ if (loopJ .opcIdx > loopUpperIndex ) {
143
+ break ; // skip remaining conditions once we're past beyond the bounds of the current one
144
+ }
145
+ if (loopJ .targetOffset < 0 && targetIndex == loopJ .getTargetIndex () && containsIndex (loop , loopJ .opcIdx )) {
146
+ // TODO debugging
147
+ System .out .println ("converted loop to nested IF-within-loop at " + loopJ .opcIdx + " (" + loopJ .opc + ") contained in " + loop + " to " + targetIndex );
148
+
149
+ conditions .set (j , loopJ .withLoopEndIndexForIf (loopUpperIndex ));
150
+ }
151
+ }
152
+ }
153
+
154
+ // set the potential-if-index of loops
155
+ if (UsageHint .isLoop (loop .usageHint ) && loop .potentialIfIndex < 0 ) {
156
+ var loopConditionIdx = findFirstIfConditionPointingToEndOf (conditions , i );
157
+
158
+ if (loopConditionIdx >= 0 ) {
159
+ loop = loop .withPotentialIfIndex (conditions .get (loopConditionIdx ).opcIdx );
160
+ conditions .set (i , loop );
161
+
162
+ // TODO debugging
163
+ System .out .println ("converted if index for loop: " + loop + " found IF " + (loopConditionIdx >= 0 ? conditions .get (loopConditionIdx ) : "-1" ));
164
+
165
+ conditions .remove (loopConditionIdx );
166
+ size --;
167
+ if (loopConditionIdx <= i ) {
168
+ i --;
169
+ }
170
+ }
171
+ }
172
+ }
173
+
174
+ return conditions ;
175
+ }
176
+
177
+
178
+ /** Find the first IF* condition that is contained within the condition located at {@code startIdx} in the {@code conditions} list.
179
+ * @param conditions list of conditions, should include all IF* and GOTO instructions in the code,
180
+ * sorted based on {@link JumpConditionInfo#getLowerIndex()}
181
+ * @param startIdx the index into the {@code conditions} list of the condition to find an IF* condition within
182
+ * @return the {@code conditions} index of the first matching IF* condition, else -1 if none is found
183
+ */
184
+ public static int findFirstIfConditionPointingToEndOf (List <JumpConditionInfo > conditions , int startIdx ) {
185
+ var withinThis = conditions .get (startIdx );
186
+ int maxIdx = withinThis .getUpperIndex ();
187
+ int lowestOpcIdxFound = Integer .MAX_VALUE ;
188
+ int lowestOpcIdxI = -1 ;
189
+
190
+ for (int i = startIdx + 1 , size = conditions .size (); i < size ; i ++) {
191
+ var cond = conditions .get (i );
192
+ // stop once the condition isn't contained within the target condition, we can safely break because the loops are sorted by lower bound index
193
+ if (cond .getLowerIndex () > maxIdx ) {
194
+ break ;
195
+ }
196
+ if (cond != withinThis && cond .opcIdx < lowestOpcIdxFound && containsIfAndEndsWith (withinThis , cond )) {
197
+ lowestOpcIdxFound = cond .opcIdx ;
198
+ lowestOpcIdxI = i ;
199
+ }
200
+ }
201
+ return lowestOpcIdxI ;
202
+ }
203
+
204
+
205
+ public static boolean containsJumpTo (byte [] code , int offset , int length , int targetIndex ) {
206
+ // BYTECODE LOOP:
207
+ for (int i = offset , size = offset + length ; i < size ; i ++) {
208
+ Opcodes opc = Opcodes .get (code [i ]);
209
+ int numOperands = opc .getOperandCount ();
210
+ // Special handling for instructions with unpredictable byte code lengths
211
+ if (numOperands == Opcodes .Const .UNPREDICTABLE ) {
212
+ if (Opcodes .WIDE .is (code [i ])) {
213
+ i ++; // WIDE opcodes are nested around other operations
214
+ opc = Opcodes .get (code [i ]);
215
+ numOperands = opc .getOperandCount () * 2 ; // WIDE opcodes double the operands of the widened opcode
216
+ }
217
+ else if (Opcodes .TABLESWITCH .is (code [i ])) {
218
+ throw new IllegalStateException ("tableswitch code handling not implemented" );
219
+ }
220
+ else if (Opcodes .LOOKUPSWITCH .is (code [i ])) {
221
+ throw new IllegalStateException ("lookupswitch code handling not implemented" );
222
+ }
223
+ }
224
+ if (opc .hasBehavior (Opcodes .Type .JUMP )) {
225
+ int jumpRelative = CodeUtility .loadOperands (numOperands , code , i );
226
+ if (i + jumpRelative == targetIndex ) {
227
+ return true ;
228
+ }
229
+ }
230
+
231
+ i += (numOperands < 0 ) ? 0 : numOperands ;
232
+ }
233
+ return false ;
234
+ }
235
+
236
+
237
+ public static int findLastOpcodeIndex (byte [] instr , int start , int end ) {
238
+ AtomicInteger lastIdx = new AtomicInteger (-1 );
239
+ CodeUtility .forEach (instr , start , end - start , (opc , instrs , idx ) -> {
240
+ lastIdx .set (idx );
241
+ });
242
+ return lastIdx .get ();
243
+ }
244
+
245
+
246
+ public static int findContainsIfIndex (List <JumpConditionInfo > loops , int index ) {
247
+ for (int i = 0 , size = loops .size (); i < size ; i ++) {
248
+ if (loops .get (i ).potentialIfIndex == index ) {
249
+ return i ;
250
+ }
251
+ }
252
+ return -1 ;
253
+ }
254
+
255
+
256
+ public static int findOpcIndex (List <JumpConditionInfo > loops , int index ) {
257
+ for (int i = 0 , size = loops .size (); i < size ; i ++) {
258
+ if (loops .get (i ).opcIdx == index ) {
259
+ return i ;
260
+ }
261
+ }
262
+ return -1 ;
263
+ }
264
+
265
+
266
+ public static boolean containsIndex (JumpConditionInfo cond , int index ) {
267
+ var condTarget = cond .opcIdx + cond .targetOffset ;
268
+ // avoid branch logic (ternary statements such as Math.min/max)
269
+ return (index >= cond .opcIdx && index <= condTarget ) || (index >= condTarget && index <= cond .opcIdx );
270
+ }
271
+
272
+
273
+ /**
274
+ * Check that an {@code ifCond}'s lower bound (generally its opcode index) is within a loop condition's
275
+ * bounds and that the {@code ifCond}'s upper bound (generally its target index) is the instruction immediately after
276
+ * the loop end instruction.
277
+ * ASSUMPTION: the {@code loopCond}'s opcode index is its upper bound (i.e. the loop ends with a backward jump instruction)
278
+ * @param loopCond the loop condition
279
+ * @param ifCond the other condition, could be a loop or if
280
+ * @return true if the conditions described above hold, false if not
281
+ */
282
+ public static boolean containsIfAndEndsWith (JumpConditionInfo loopCond , JumpConditionInfo ifCond ) {
283
+ return loopCond .getTargetIndex () <= ifCond .getLowerIndex () &&
284
+ // require the match to be a condition that jumps to the instruction after the loop
285
+ loopCond .getOpcodeIndex () + loopCond .opc .getOperandCount () + 1 == ifCond .getUpperIndex ();
55
286
}
56
287
57
288
@@ -66,18 +297,18 @@ public static int maxIndex(IntListReadOnly codeFlow) {
66
297
}
67
298
68
299
69
- public static String flowPathToString (byte [] instr , IntListReadOnly codeFlow ) {
300
+ public static String flowPathToString (byte [] code , IntListReadOnly codeFlow ) {
70
301
var sb = new StringBuilder ();
71
302
for (int i = 0 , size = codeFlow .size (); i < size ; i ++) {
72
303
var idx = codeFlow .get (i );
73
304
// a conditional/jump point
74
305
if (idx < 0 ) {
75
- var opc = Opcodes .get (instr [~idx ] & 0xFF );
306
+ var opc = Opcodes .get (code [~idx ]);
76
307
sb .append (~idx ).append (' ' ).append (opc ).append (" -> " );
77
308
}
78
309
// a terminal point
79
310
else {
80
- var opc = Opcodes .get (instr [idx ] & 0xFF );
311
+ var opc = Opcodes .get (code [idx ]);
81
312
sb .append (idx ).append (' ' ).append (opc ).append ("], " );
82
313
}
83
314
}
0 commit comments