Skip to content

Commit 8be23bc

Browse files
committed
adding malloc wrap and addr attr to mem instrument
1 parent 96d107a commit 8be23bc

12 files changed

+782
-1
lines changed

CMakeLists.txt

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
project(sample)
2+
add_library(myclient SHARED memtrace.c error_detector.c)
3+
target_include_directories(myclient PRIVATE ${include/})
4+
find_package(DynamoRIO PATHS ../DynamoRIO/cmake)
5+
if (NOT DynamoRIO_FOUND)
6+
message(FATAL_ERROR "DynamoRIO package required to build")
7+
endif(NOT DynamoRIO_FOUND)
8+
configure_DynamoRIO_client(myclient)
9+
use_DynamoRIO_extension(myclient "drmgr")
10+
use_DynamoRIO_extension(myclient "drreg")
11+
use_DynamoRIO_extension(myclient "drutil")
12+
use_DynamoRIO_extension(myclient "drx")
13+
use_DynamoRIO_extension(myclient "droption")
14+
use_DynamoRIO_extension(myclient "drsyms")
15+
use_DynamoRIO_extension(myclient "drcallstack")
16+
use_DynamoRIO_extension(myclient "drwrap")

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
# BasicThreadErrorDetector
1+
# Basic Thread Error Detector based on DynamoRIO
2+
3+
4+
https://storage.googleapis.com/pub-tools-public-publication-data/pdf/35604.pdf

build.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
clang testPrograms/basicMultiThread.c -o basicMultiThread.elf -lpthread
2+
mkdir build
3+
rm build/libmyclient.so
4+
cd build
5+
cmake ../
6+
make
7+
cd ../

buildAndRun.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bash build.sh
2+
../DynamoRIO/bin64/drrun -c build/libmyclient.so -- basicMultiThread.elf -lpthread

error_detector.c

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#include "include/memtrace.h"
2+
3+
#define MAX_THREADS 100
4+
const u64 linear_set_size_increment = 1000000;
5+
6+
// todo => shrink to 8 bytes. Should be possible if memory address access/accessing only store
7+
// bytes of the virtual address that define the memory locations relative to the process pages)
8+
// reference: https://developer.arm.com/documentation/den0024/a/The-Memory-Management-Unit/Translating-a-Virtual-Address-to-a-Physical-Address
9+
// abstract: only the last 28 bits of an virtual address encode the actual loation(or index) of the address, the rest is context
10+
typedef struct MemoryAccess {
11+
usize address_accessed;
12+
u16 opcode;
13+
u64 size;
14+
u32 callee_thread_id;
15+
} MemoryAccess;
16+
17+
typedef struct LockAccess {
18+
usize lock_address;
19+
u32 callee_thread_id;
20+
} LockAccess;
21+
22+
typedef struct ThreadState {
23+
u64 thread_id;
24+
MemoryAccess *mem_read_set;
25+
u64 mem_read_set_capacity;
26+
u64 mem_read_set_len;
27+
28+
MemoryAccess *mem_write_set;
29+
u64 mem_write_set_capacity;
30+
u64 mem_write_set_len;
31+
32+
LockAccess *lock_unlock_set;
33+
u64 lock_unlock_set_capacity;
34+
u64 lock_unlock_set_len;
35+
36+
LockAccess *lock_lock_set;
37+
u64 lock_lock_set_capacity;
38+
u64 lock_lock_set_len;
39+
} ThreadState;
40+
41+
ThreadState threads[MAX_THREADS] = {};
42+
u64 n_threads = 0;
43+
44+
i64 findThreadByTId(u64 tid) {
45+
u64 i;
46+
for (i = 0; i <= n_threads; i++) {
47+
if (threads[i].thread_id == tid) {
48+
return i;
49+
}
50+
}
51+
return -1;
52+
}
53+
54+
void mem_analyse_exit() {
55+
u64 j;
56+
for (j = 0; j < n_threads; j++) {
57+
u64 i;
58+
printf("Thread id: %ld \n", threads[j].thread_id);
59+
printf("mem_write_set_len: %ld \n", threads[j].mem_write_set_len);
60+
printf("mem_read_set_len: %ld \n", threads[j].mem_read_set_len);
61+
printf("unlocks: %ld \n", threads[j].lock_unlock_set_len);
62+
printf("locks: %ld \n", threads[j].lock_lock_set_len);
63+
// for (i = 0; i < threads[j].mem_write_set_len; i++) {
64+
// printf("[%d]tid write access to address: %ld, size: %ld, opcode: %s \n", threads[j].mem_write_set[i].thread_id, threads[j].mem_write_set[i].address_accessed, threads[j].mem_write_set[i].size, (threads[j].mem_read_set[i].opcode > REF_TYPE_WRITE) ? decode_opcode_name(threads[j].mem_read_set[i].opcode) /* opcode for instr */ : (threads[j].mem_read_set[i].opcode == REF_TYPE_WRITE ? "w" : "r"));
65+
// }
66+
// for (i = 0; i < threads[j].mem_read_set_len; i++) {
67+
// printf("[%d]tid read access to address: %ld, size: %ld, opcode: %s \n", threads[j].mem_write_set[i].thread_id, threads[j].mem_read_set[i].address_accessed, threads[j].mem_read_set[i].size, (threads[j].mem_read_set[i].opcode > REF_TYPE_WRITE) ? decode_opcode_name(threads[j].mem_read_set[i].opcode) /* opcode for instr */ : (threads[j].mem_read_set[i].opcode == REF_TYPE_WRITE ? "w" : "r"));
68+
// }
69+
70+
free(threads[j].mem_write_set);
71+
free(threads[j].mem_read_set);
72+
free(threads[j].lock_lock_set);
73+
free(threads[j].lock_unlock_set);
74+
}
75+
}
76+
77+
u32 mem_analyse_init() {
78+
return 1;
79+
}
80+
81+
u32 mem_analyse_new_thread_init(u64 thread_id) {
82+
if (n_threads >= MAX_THREADS) return 0;
83+
threads[n_threads].thread_id = thread_id;
84+
85+
threads[n_threads].mem_read_set = (MemoryAccess*)malloc(sizeof(MemoryAccess) * linear_set_size_increment);
86+
threads[n_threads].mem_read_set_capacity = linear_set_size_increment;
87+
if (threads[n_threads].mem_read_set == NULL) {
88+
printf("set allocation error \n");
89+
return 0;
90+
}
91+
threads[n_threads].mem_write_set = (MemoryAccess*)malloc(sizeof(MemoryAccess) * linear_set_size_increment);
92+
threads[n_threads].mem_write_set_capacity = linear_set_size_increment;
93+
if (threads[n_threads].mem_write_set == NULL) {
94+
printf("set allocation error \n");
95+
return 0;
96+
}
97+
98+
threads[n_threads].lock_lock_set = (LockAccess*)malloc(sizeof(LockAccess) * linear_set_size_increment);
99+
threads[n_threads].lock_lock_set_capacity = linear_set_size_increment;
100+
if (threads[n_threads].lock_lock_set == NULL) {
101+
printf("set allocation error \n");
102+
return 0;
103+
}
104+
threads[n_threads].lock_unlock_set = (LockAccess*)malloc(sizeof(LockAccess) * linear_set_size_increment);
105+
threads[n_threads].lock_unlock_set_capacity = linear_set_size_increment;
106+
if (threads[n_threads].lock_unlock_set == NULL) {
107+
printf("set allocation error \n");
108+
return 0;
109+
}
110+
111+
n_threads += 1;
112+
return 1;
113+
}
114+
115+
void mem_analyse_thread_exit() {
116+
// printf("thread exit \n");
117+
}
118+
119+
void *increase_set_capacity(void *set, u64 *set_capacity) {
120+
*set_capacity += linear_set_size_increment;
121+
printf("new set_capacity: %ld \n", *set_capacity);
122+
return realloc(set, *set_capacity);
123+
}
124+
125+
void wrap_pre_unlock(void *wrapcxt, OUT void **user_data) {
126+
void *addr = drwrap_get_arg(wrapcxt, 0);
127+
u64 thread_id = dr_get_thread_id(wrapcxt);
128+
// printf("pthread_unlock called\n");
129+
i64 t_index = findThreadByTId(thread_id);
130+
if (t_index < 0) {
131+
// printf("error finding thread_id. %ld \n", thread_id);
132+
return;
133+
}
134+
ThreadState *curr_thread = &threads[t_index];
135+
curr_thread->lock_unlock_set[curr_thread->lock_unlock_set_len].lock_address = (u64)addr;
136+
curr_thread->lock_unlock_set[curr_thread->lock_unlock_set_len].callee_thread_id = thread_id;
137+
curr_thread->lock_unlock_set_len += 1;
138+
}
139+
140+
void wrap_pre_lock(void *wrapcxt, OUT void **user_data) {
141+
void *addr = drwrap_get_arg(wrapcxt, 0);
142+
// todo => wrong id
143+
u64 thread_id = dr_get_thread_id(wrapcxt);
144+
// printf("pthread_lock called %ld \n", thread_id);
145+
i64 t_index = findThreadByTId(thread_id);
146+
if (t_index < 0) {
147+
// printf("error finding thread_id. %ld \n", thread_id);
148+
return; }
149+
ThreadState *curr_thread = &threads[t_index];
150+
curr_thread->lock_lock_set[curr_thread->lock_lock_set_len].lock_address = (u64)addr;
151+
curr_thread->lock_lock_set[curr_thread->lock_lock_set_len].callee_thread_id = thread_id;
152+
curr_thread->lock_lock_set_len += 1;
153+
}
154+
155+
156+
void wrap_post_malloc(void *wrapcxt, OUT void **user_data) {
157+
void *addr = drwrap_get_retval(wrapcxt);
158+
// todo => wrong id
159+
u64 thread_id = dr_get_thread_id(wrapcxt);
160+
// printf("pthread_lock called %ld \n", thread_id);
161+
i64 t_index = findThreadByTId(thread_id);
162+
if (t_index < 0) {
163+
// printf("error finding thread_id. %ld \n", thread_id);
164+
// return;
165+
}
166+
ThreadState *curr_thread = &threads[t_index];
167+
curr_thread->lock_lock_set[curr_thread->lock_lock_set_len].lock_address = (u64)addr;
168+
curr_thread->lock_lock_set[curr_thread->lock_lock_set_len].callee_thread_id = thread_id;
169+
curr_thread->lock_lock_set_len += 1;
170+
}
171+
172+
// this is an event like fn that is envoked on every memory access (called by DynamRIO)
173+
void memtrace(void *drcontext, u64 thread_id) {
174+
per_thread_t *data;
175+
mem_ref_t *mem_ref, *buf_ptr;
176+
data = drmgr_get_tls_field(drcontext, tls_idx);
177+
buf_ptr = BUF_PTR(data->seg_base);
178+
179+
i64 t_index = findThreadByTId(thread_id);
180+
if (t_index < 0) {
181+
// printf("error finding thread_id. %ld \n", thread_id);
182+
return;
183+
}
184+
ThreadState *curr_thread = &threads[t_index];
185+
for (mem_ref = (mem_ref_t *)data->buf_base; mem_ref < buf_ptr; mem_ref++) {
186+
if (mem_ref->type == 1 || mem_ref->type == 457 || mem_ref->type == 458 || mem_ref->type == 456 || mem_ref->type == 568) {
187+
// mem write
188+
if (curr_thread->mem_write_set_len >= curr_thread->mem_write_set_capacity) curr_thread->mem_write_set = increase_set_capacity(curr_thread->mem_write_set, &curr_thread->mem_write_set_capacity);
189+
if (curr_thread->mem_write_set == NULL) exit(1);
190+
curr_thread->mem_write_set[curr_thread->mem_write_set_len].address_accessed = (usize)mem_ref->addr;
191+
curr_thread->mem_write_set[curr_thread->mem_write_set_len].opcode = mem_ref->type;
192+
curr_thread->mem_write_set[curr_thread->mem_write_set_len].callee_thread_id = thread_id;
193+
curr_thread->mem_write_set[curr_thread->mem_write_set_len].size = mem_ref->size;
194+
curr_thread->mem_write_set_len += 1;
195+
} else if(mem_ref->type == 0 || mem_ref->type == 227 || mem_ref->type == 225 || mem_ref->type == 197 || mem_ref->type == 228 || mem_ref->type == 229 || mem_ref->type == 299 || mem_ref->type == 173) {
196+
// mem read
197+
if (curr_thread->mem_read_set_len >= curr_thread->mem_read_set_capacity) curr_thread->mem_read_set = increase_set_capacity(curr_thread->mem_read_set, &curr_thread->mem_read_set_capacity);
198+
if (curr_thread->mem_read_set == NULL) exit(1);
199+
curr_thread->mem_read_set[curr_thread->mem_read_set_len].address_accessed = (usize)mem_ref->addr;
200+
curr_thread->mem_read_set[curr_thread->mem_read_set_len].opcode = mem_ref->type;
201+
curr_thread->mem_read_set[curr_thread->mem_read_set_len].callee_thread_id = thread_id;
202+
curr_thread->mem_read_set[curr_thread->mem_read_set_len].size = mem_ref->size;
203+
curr_thread->mem_read_set_len += 1;
204+
}
205+
// else {
206+
// printf("missed %d \n", mem_ref->type);
207+
// }
208+
// // /* We use PIFX to avoid leading zeroes and shrink the resulting file. */
209+
// fprintf(data->logf, "" PIFX ": %2d, %s (%d)\n", (ptr_uint_t)mem_ref->addr, mem_ref->size, (mem_ref->type > REF_TYPE_WRITE) ? decode_opcode_name(mem_ref->type) /* opcode for instr */ : (mem_ref->type == REF_TYPE_WRITE ? "w" : "r"), mem_ref->type);
210+
data->num_refs++;
211+
}
212+
BUF_PTR(data->seg_base) = data->buf_base;
213+
}

include/memtrace.h

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include <stdio.h>
2+
#include <stddef.h> /* for offsetof */
3+
#include <string.h>
4+
#include <sys/syscall.h>
5+
#include "dr_api.h"
6+
#include "drmgr.h"
7+
#include "drreg.h"
8+
#include "drutil.h"
9+
#include "drx.h"
10+
#include "drwrap.h"
11+
#include "drcallstack.h"
12+
#include "drsyms.h"
13+
14+
#include "types.h"
15+
16+
#define SYS_MAX_ARGS 3
17+
#define TLS_SLOT(tls_base, enum_val) (void **)((byte *)(tls_base) + tls_offs + (enum_val))
18+
#define BUF_PTR(tls_base) *(mem_ref_t **)TLS_SLOT(tls_base, MEMTRACE_TLS_OFFS_BUF_PTR)
19+
20+
enum {
21+
REF_TYPE_READ = 0,
22+
REF_TYPE_WRITE = 1,
23+
};
24+
/* Each mem_ref_t is a <type, size, addr> entry representing a memory reference
25+
* instruction or the reference information, e.g.:
26+
* - mem ref instr: { type = 42 (call), size = 5, addr = 0x7f59c2d002d3 }
27+
* - mem ref info: { type = 1 (write), size = 8, addr = 0x7ffeacab0ec8 }
28+
*/
29+
typedef struct _mem_ref_t {
30+
ushort type; /* r(0), w(1), or opcode (assuming 0/1 are invalid opcode) */
31+
ushort size; /* mem ref size or instr length */
32+
app_pc addr; /* mem ref addr or instr pc */
33+
} mem_ref_t;
34+
35+
/* Max number of mem_ref a buffer can have. It should be big enough
36+
* to hold all entries between clean calls.
37+
*/
38+
#define MAX_NUM_MEM_REFS 4096
39+
/* The maximum size of buffer for holding mem_refs. */
40+
#define MEM_BUF_SIZE (sizeof(mem_ref_t) * MAX_NUM_MEM_REFS)
41+
42+
/* thread private log file and counter */
43+
typedef struct {
44+
byte *seg_base;
45+
mem_ref_t *buf_base;
46+
file_t log;
47+
FILE *logf;
48+
uint64 num_refs;
49+
50+
reg_t param[SYS_MAX_ARGS];
51+
bool repeat;
52+
} per_thread_t;
53+
54+
/* Allocated TLS slot offsets */
55+
enum {
56+
MEMTRACE_TLS_OFFS_BUF_PTR,
57+
MEMTRACE_TLS_COUNT, /* total number of TLS slots allocated */
58+
};
59+
60+
int tls_idx;
61+
uint tls_offs;
62+
63+
int num_syscalls;
64+
65+
extern void memtrace(void *drcontext, u64 thread_id);
66+
extern u32 mem_analyse_init();
67+
extern void mem_analyse_exit();
68+
extern void mem_analyse_thread_exit();
69+
extern u32 mem_analyse_new_thread_init(u64 thread_id);
70+
extern void wrap_pre_unlock(void *wrapcxt, OUT void **user_data);
71+
extern void wrap_pre_lock(void *wrapcxt, OUT void **user_data);
72+
extern void wrap_post_malloc(void *wrapcxt, OUT void **user_data);

include/types.h

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include <stdint.h>
2+
3+
typedef uint8_t u8;
4+
typedef int8_t i8;
5+
6+
typedef uint16_t u16;
7+
typedef int16_t i16;
8+
9+
typedef uint32_t u32;
10+
typedef int32_t i32;
11+
12+
typedef uint64_t u64;
13+
typedef int64_t i64;
14+
15+
typedef intptr_t isize;
16+
typedef uintptr_t usize;

0 commit comments

Comments
 (0)