Skip to content

Commit 3653372

Browse files
committed
0.5.1 - Fix 'switch' statement decompilation. Start work on 'if' statements.
1 parent 2497bc4 commit 3653372

14 files changed

+488
-173
lines changed

CHANGELOG.md

+13-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,19 @@ This project does its best to adhere to [Semantic Versioning](http://semver.org/
44

55

66
--------
7-
### [0.5.0](N/A) - 2020-12-05
7+
### [0.5.1](N/A) - 2020-12-05
8+
__Fix `switch` statements to decompile much more accurately based on code flow analysis. Start work on `if` statements.__
9+
#### Added
10+
* new `Indent` class to handle `SourceWriter` indentation
11+
* `Switch` to handle switch code flow initialization and info
12+
13+
#### Changed
14+
* Add code flow analysis initialization and info to `JumpConditionInfo`
15+
* Assume that a forward goto right before a condition is an `else` statement
16+
17+
18+
--------
19+
### [0.5.0](https://github.com/TeamworkGuy2/ClassLoading/commit/2497bc4caaa27e6574afad64cace3475238da9f2) - 2020-12-05
820
__Decompilation to source code in-progress and first round trip compile/decompile unit tests__
921
#### Added
1022
* A new `twg2.jbcm.ir` package with helper classes for tracking state and data related to decompilation

README.md

+8-7
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,23 @@ See the `twg2.jbcm.main.UsageCliMain` class for a simple command line interface
99

1010
Reference: [Java Virtual Machine Spec (Java 9)](https://docs.oracle.com/javase/specs/jvms/se9/html/index.html)
1111

12-
### `twg2.jbcm.classFormat`
12+
Packages:
13+
### twg2.jbcm.classFormat
1314
Contains implementation of the [class file format](https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-4.html)
1415
with related attributes (`twg2.jbcm.classFormat.attributes`) and constant pool types (`twg2.jbcm.classFormat.constantPool`).
1516

16-
### `twg2.jbcm` and `twg2.jbcm.modify`
17+
### twg2.jbcm & twg2.jbcm.modify
1718
Interfaces and utilities for searching and modifying class files.
1819

19-
### `twg2.jbcm`
20+
### twg2.jbcm
2021
Utilities and the `Opcodes` enum containing detailed, programatic information about the [Java instruction set opcodes](https://docs.oracle.com/javase/specs/jvms/se9/html/jvms-6.html#jvms-6.5).
21-
Also see the [extract-opcodes.js] file for how the enum literals in `Opcodes` are generated.
22+
Also see [extract-opcodes.js](extract-opcodes.js) file for how the enum literals in `Opcodes` are generated.
2223

23-
### `twg2.jbcm.dynamicModification` and `twg2.jbcm.parserExamples`
24+
### twg2.jbcm.dynamicModification & twg2.jbcm.parserExamples
2425
Classes used by the example and test packages.
2526

26-
### `twg2.jbcm.runtimeLoading`
27+
### twg2.jbcm.runtimeLoading
2728
Runtime class loading.
2829

29-
### `twg2.jbcm.main`
30+
### twg2.jbcm.main
3031
Example console apps.

package-lib.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version" : "0.5.0",
2+
"version" : "0.5.1",
33
"name" : "class-loading",
44
"description" : "Java class file parsing, manipulation, and to human readable representation",
55
"homepage" : "https://github.com/TeamworkGuy2/ClassLoading",

src/twg2/jbcm/CodeFlow.java

+14
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ public static IntArrayList getFlowPaths(int idx, byte[] instr, IntArrayList dstP
3232
dstPath.add(~i);
3333
}
3434
int jumpDst = opc.getJumpDestination(instr, i);
35+
if(jumpDst < 0) {
36+
jumpDst = opc.getJumpDestination(instr, i);
37+
}
3538
getFlowPaths(jumpDst, instr, dstPath);
3639

3740
// end this code path if the jump path is unconditional (i.e. GOTO or JSR)
@@ -52,6 +55,17 @@ else if(opc.hasBehavior(Type.RETURN) || opc == Opcodes.ATHROW) {
5255
}
5356

5457

58+
public static int maxIndex(IntListReadOnly codeFlow) {
59+
int max = -1;
60+
for(int i = 0, size = codeFlow.size(); i < size; i++) {
61+
int index = codeFlow.get(i);
62+
max = Math.max(index < 0 ? ~index : index, max);
63+
}
64+
65+
return max;
66+
}
67+
68+
5569
public static String flowPathToString(byte[] instr, IntListReadOnly codeFlow) {
5670
var sb = new StringBuilder();
5771
for(int i = 0, size = codeFlow.size(); i < size; i++) {

src/twg2/jbcm/IoUtility.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public static void writeShort(short value, byte[] b, int offset) {
6363
* a long by {@code (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3]}
6464
*/
6565
public static long readLong(byte[] b, int offset) {
66-
return ((long)b[offset] << 56) |
66+
return ((long)(b[offset] & 0xFF) << 56) |
6767
((long)(b[offset+1] & 0xFF) << 48) |
6868
((long)(b[offset+2] & 0xFF) << 40) |
6969
((long)(b[offset+3] & 0xFF) << 32) |
@@ -81,7 +81,10 @@ public static long readLong(byte[] b, int offset) {
8181
* an integer by {@code (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3]}
8282
*/
8383
public static int readInt(byte[] b, int offset) {
84-
return (b[offset] << 24) | (b[offset+1] << 16) | (b[offset+2] << 8) | b[offset+3];
84+
return ((b[offset] & 0xFF) << 24) |
85+
((b[offset+1] & 0xFF) << 16) |
86+
((b[offset+2] & 0xFF) << 8) |
87+
(b[offset+3] & 0xFF);
8588
}
8689

8790

@@ -92,7 +95,7 @@ public static int readInt(byte[] b, int offset) {
9295
* a short by {@code (b[offset] << 8) | b[offset+1]}
9396
*/
9497
public static short readShort(byte[] b, int offset) {
95-
return (short)((b[offset] << 8) | b[offset+1]);
98+
return (short)(((b[offset] & 0xFF) << 8) | (b[offset+1] & 0xFF));
9699
}
97100

98101

src/twg2/jbcm/ir/JumpConditionInfo.java

+65-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
package twg2.jbcm.ir;
22

3+
import java.util.List;
4+
import java.util.concurrent.atomic.AtomicReference;
5+
6+
import twg2.collections.primitiveCollections.IntArrayList;
7+
import twg2.jbcm.CodeFlow;
38
import twg2.jbcm.Opcodes;
49

510
/** Contains the beginning and end opcodes and targets for a goto/if condition pair
@@ -9,14 +14,18 @@
914
public class JumpConditionInfo {
1015
private final Opcodes opc;
1116
private final int opcIdx;
12-
private final int targetOffsetIdx;
17+
private final int targetOffset;
18+
private final IntArrayList codeFlow;
19+
public final int codeFlowMaxIndex;
1320
private boolean finished;
1421

1522

16-
public JumpConditionInfo(Opcodes opc, int opcIdx, int targetOffsetIdx) {
23+
public JumpConditionInfo(Opcodes opc, int opcIdx, int targetOffset, int codeFlowMaxIndex, IntArrayList codeFlow) {
1724
this.opc = opc;
1825
this.opcIdx = opcIdx;
19-
this.targetOffsetIdx = targetOffsetIdx;
26+
this.targetOffset = targetOffset;
27+
this.codeFlowMaxIndex = codeFlowMaxIndex;
28+
this.codeFlow = codeFlow;
2029
}
2130

2231

@@ -30,13 +39,13 @@ public int getOpcodeIndex() {
3039
}
3140

3241

33-
public int getTargetOffset() {
34-
return targetOffsetIdx;
42+
public IntArrayList getCodeFlow() {
43+
return codeFlow;
3544
}
3645

3746

3847
public int getTargetIndex() {
39-
return opcIdx + targetOffsetIdx;
48+
return opcIdx + targetOffset;
4049
}
4150

4251

@@ -49,4 +58,54 @@ public boolean isFinished() {
4958
return finished;
5059
}
5160

61+
62+
/** Analyze a switch case and return helpful information about it's bytecode layout.
63+
* Used by {@link #loadTableSwitch(int, byte[], List, AtomicReference) and {@link #loadLookupSwitch(int, byte[], List, AtomicReference)}
64+
* @param caseMatch the value to match for this case in the switch
65+
* @param targetIdx the target {@code instr} index at which the case's code begins
66+
* @param instr the method bytecode array
67+
* @return the analyzed switch information
68+
*/
69+
public static JumpConditionInfo loadConditionFlow(Opcodes opc, int idx, int targetOffset, byte[] instr) {
70+
// analyze code flow path
71+
var condFlowPath = new IntArrayList();
72+
condFlowPath.add(~idx);
73+
CodeFlow.getFlowPaths(idx, instr, condFlowPath);
74+
75+
// potential end index (probably redundant once code flow is working)
76+
var maxCodeFlowIndex = CodeFlow.maxIndex(condFlowPath);
77+
78+
return new JumpConditionInfo(opc, idx, targetOffset, maxCodeFlowIndex, condFlowPath);
79+
}
80+
81+
82+
public static int findLoopStart(int curIdx, int jumpRelative, List<JumpConditionInfo> loops) {
83+
// Loops are generally compiled using a GOTO and an IF_* instruction
84+
// form 1: [..., GOTO <setup_if[0]>, instructions[], setup_if[], IF_* <instructions[0]>, ...]
85+
if(jumpRelative < 0) {
86+
var jumpToIdx = curIdx + jumpRelative - 3; // GOTO has a 2 byte operand so -3 is the GOTO instruction index right before the jump destination (which is the first instruction in a loop)
87+
for(int i = loops.size() - 1; i >= 0; i--) {
88+
var cond = loops.get(i);
89+
if(cond.getOpcodeIndex() == jumpToIdx) {
90+
return i;
91+
}
92+
}
93+
}
94+
return -1;
95+
}
96+
97+
98+
public static int findLoopEnd(int curIdx, int numOperands, int jumpRelative, List<JumpConditionInfo> loops) {
99+
if(jumpRelative > 0) {
100+
var instAfterJumpIdx = curIdx + numOperands + 1;
101+
for(int i = loops.size() - 1; i >= 0; i--) {
102+
var cond = loops.get(i);
103+
if(cond.getTargetIndex() == instAfterJumpIdx) {
104+
return i;
105+
}
106+
}
107+
}
108+
return -1;
109+
}
110+
52111
}

src/twg2/jbcm/ir/Switch.java

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package twg2.jbcm.ir;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.concurrent.atomic.AtomicReference;
6+
7+
import twg2.jbcm.toSource.SwitchFlow;
8+
9+
/**
10+
* @author TeamworkGuy2
11+
* @since 2020-12-05
12+
*/
13+
public class Switch {
14+
public List<SwitchCase> switchCases;
15+
protected List<SwitchCase> finishedCases;
16+
public SwitchCase switchDefault;
17+
public int switchInstSize;
18+
public int switchEndIdx;
19+
/** whether all the cases in this switch return/throw before the next case starts (see {@link SwitchFlow#isSwitchSimplePacked(List, SwitchCase, byte[])}) */
20+
public boolean isReturnPacked;
21+
22+
23+
public Switch(List<SwitchCase> switchCases, SwitchCase switchDefault) {
24+
this.switchCases = switchCases;
25+
this.switchDefault = switchDefault;
26+
this.finishedCases = new ArrayList<SwitchCase>();
27+
}
28+
29+
30+
public void finish(SwitchCase switchCase) {
31+
switchCase.finish();
32+
finishedCases.add(switchCase);
33+
}
34+
35+
36+
public boolean isFinished() {
37+
return finishedCases.size() == switchCases.size() + 1; // + 1 for default case
38+
}
39+
40+
41+
public static Switch loadTableSwitch(int i, byte[] instr) {
42+
var dstCases = new ArrayList<SwitchCase>();
43+
var dstSwitchDefault = new AtomicReference<SwitchCase>();
44+
int newI = SwitchFlow.loadTableSwitch(i, instr, dstCases, dstSwitchDefault);
45+
var inst = new Switch(dstCases, dstSwitchDefault.get());
46+
int endIdx = SwitchFlow.commonSwitchEndIndex(inst.switchCases, inst.switchDefault, instr);
47+
if(endIdx == -1) {
48+
endIdx = SwitchFlow.maxSwitchCodeFlowIndex(inst.switchCases, inst.switchDefault, instr);
49+
if(endIdx > -1) {
50+
inst.isReturnPacked = true;
51+
}
52+
}
53+
inst.switchEndIdx = endIdx;
54+
inst.switchInstSize = newI;
55+
return inst;
56+
}
57+
58+
59+
public static Switch loadLookupSwitch(int i, byte[] instr) {
60+
var dstCases = new ArrayList<SwitchCase>();
61+
var dstSwitchDefault = new AtomicReference<SwitchCase>();
62+
int newI = SwitchFlow.loadLookupSwitch(i, instr, dstCases, dstSwitchDefault);
63+
var inst = new Switch(dstCases, dstSwitchDefault.get());
64+
int endIdx = SwitchFlow.commonSwitchEndIndex(inst.switchCases, inst.switchDefault, instr);
65+
if(endIdx == -1) {
66+
endIdx = SwitchFlow.maxSwitchCodeFlowIndex(inst.switchCases, inst.switchDefault, instr);
67+
if(endIdx > -1) {
68+
inst.isReturnPacked = true;
69+
}
70+
}
71+
inst.switchEndIdx = endIdx;
72+
inst.switchInstSize = newI;
73+
return inst;
74+
}
75+
}

src/twg2/jbcm/ir/SwitchCase.java

+8-26
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import java.util.Comparator;
44

5-
import twg2.collections.primitiveCollections.IntList;
5+
import twg2.collections.primitiveCollections.IntArrayList;
66
import twg2.jbcm.CodeFlow;
77

88
/**
@@ -17,49 +17,31 @@ public class SwitchCase {
1717
}
1818
};
1919

20-
public static Comparator<SwitchCase> CASE_INDEX_COMPARATOR = new Comparator<SwitchCase>() {
20+
public static Comparator<SwitchCase> CASE_TARGET_INDEX_COMPARATOR = new Comparator<SwitchCase>() {
2121
@Override public int compare(SwitchCase o1, SwitchCase o2) {
2222
return o1.caseTarget - o2.caseTarget;
2323
}
2424
};
2525

2626
public final int caseMatch;
2727
public final int caseTarget;
28-
public final int caseEndIdx;
29-
public final int caseEndTarget;
30-
public final boolean hasEndTarget;
31-
private final IntList codeFlow;
28+
private final IntArrayList codeFlow;
29+
public final int codeFlowMaxIndex;
3230
private boolean finished;
3331

34-
public SwitchCase(int caseMatch, int caseTarget, int caseEndIdx, IntList codeFlow) {
35-
this.caseMatch = caseMatch;
36-
this.caseTarget = caseTarget;
37-
this.caseEndIdx = caseEndIdx;
38-
this.caseEndTarget = 0;
39-
this.hasEndTarget = false;
40-
this.codeFlow = codeFlow;
41-
}
4232

43-
44-
public SwitchCase(int caseMatch, int caseTarget, int caseEndIdx, int caseEndTarget, IntList codeFlow) {
33+
public SwitchCase(int caseMatch, int caseTarget, int codeFlowMaxIndex, IntArrayList codeFlow) {
4534
this.caseMatch = caseMatch;
4635
this.caseTarget = caseTarget;
47-
this.caseEndIdx = caseEndIdx;
48-
this.caseEndTarget = caseEndTarget;
49-
this.hasEndTarget = true;
36+
this.codeFlowMaxIndex = codeFlowMaxIndex;
5037
this.codeFlow = codeFlow;
5138
}
5239

5340

54-
public boolean contains(int idx) {
55-
return idx >= caseTarget && idx <= caseEndIdx;
56-
}
57-
58-
5941
/** The {@link CodeFlow} for this switch case statement, starting from the case target, tracing all non-circular paths within the method's code
6042
* @return
6143
*/
62-
public IntList getCodeFlow() {
44+
public IntArrayList getCodeFlow() {
6345
return codeFlow;
6446
}
6547

@@ -76,6 +58,6 @@ public boolean isFinished() {
7658

7759
@Override
7860
public String toString() {
79-
return "case " + this.caseMatch + ": [" + this.caseTarget + ", " + this.caseEndIdx + (this.hasEndTarget ? "] -> " + this.caseEndTarget : "]");
61+
return "case " + this.caseMatch + ": [" + this.caseTarget + ", " + this.codeFlow + "]";
8062
}
8163
}

0 commit comments

Comments
 (0)