Skip to content

Commit d9042da

Browse files
committed
add best practice guide
1 parent a55e6fc commit d9042da

File tree

2 files changed

+653
-551
lines changed

2 files changed

+653
-551
lines changed

docs/best_practice.md

+362
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
# Best Practices Guide
2+
3+
## Core Design Principles
4+
5+
### 1. Single Responsibility
6+
7+
- Each agent should handle one primary function
8+
- Break complex behaviors into specialized agents
9+
- Keep message handlers focused and specific
10+
11+
```python
12+
# Good
13+
class DataValidator(Worker):
14+
async def validate(self, data): pass
15+
16+
17+
class DataProcessor(Worker):
18+
async def process(self, data): pass
19+
20+
21+
# Avoid
22+
class DataHandler(Worker):
23+
async def validate_and_process(self, data): pass
24+
```
25+
26+
### 2. Message Immutability
27+
28+
- Define messages using dataclasses
29+
- Never modify received messages
30+
- Create new instances for changes
31+
32+
```python
33+
@dataclass(frozen=True) # Enforces immutability
34+
class TaskMessage:
35+
id: str
36+
data: Any
37+
timestamp: float = field(default_factory=time.time)
38+
```
39+
40+
### 3. Event-Driven Architecture
41+
42+
- Use decorators for message handling
43+
- Implement asynchronous communication
44+
- Handle events independently
45+
46+
```python
47+
class EventDrivenAgent(Worker):
48+
@on(TaskMessage)
49+
async def handle_task(self, msg: TaskMessage):
50+
await self.process_task(msg)
51+
52+
@on_connect("*")
53+
async def handle_connection(self, topic: str, agent: AgentDetail):
54+
await self.initialize_connection(agent)
55+
```
56+
57+
## Architecture Patterns
58+
59+
### 1. Layered Communication
60+
61+
```python
62+
class SystemArchitecture:
63+
def __init__(self):
64+
self.layers = {
65+
'coordination': AdminAgent(),
66+
'processing': [WorkerAgent() for _ in range(3)],
67+
'storage': StorageAgent()
68+
}
69+
```
70+
71+
### 2. State Management
72+
73+
```python
74+
class StatefulAgent(Worker):
75+
def __init__(self):
76+
self.state = AgentState.IDLE
77+
self._transitions = {
78+
AgentState.IDLE: [AgentState.PROCESSING],
79+
AgentState.PROCESSING: [AgentState.COMPLETED, AgentState.ERROR]
80+
}
81+
82+
async def transition(self, new_state: AgentState):
83+
if new_state in self._transitions[self.state]:
84+
self.state = new_state
85+
```
86+
87+
### 3. Resource Management
88+
89+
```python
90+
class ResourceAwareAgent(Worker):
91+
async def __aenter__(self):
92+
await self.initialize_resources()
93+
return self
94+
95+
async def __aexit__(self, exc_type, exc_val, exc_tb):
96+
await self.cleanup_resources()
97+
```
98+
99+
## Error Handling and Resilience
100+
101+
### 1. Graceful Error Recovery
102+
103+
```python
104+
class ResilientAgent(Worker):
105+
async def execute_with_retry(self, task, max_retries=3):
106+
for attempt in range(max_retries):
107+
try:
108+
return await self.process(task)
109+
except Exception as e:
110+
if attempt == max_retries - 1:
111+
await self.handle_failure(task, e)
112+
await asyncio.sleep(2 ** attempt)
113+
```
114+
115+
### 2. Circuit Breaking
116+
117+
```python
118+
class CircuitBreaker:
119+
def __init__(self, failure_threshold=5, reset_timeout=60):
120+
self.failures = 0
121+
self.threshold = failure_threshold
122+
self.reset_timeout = reset_timeout
123+
self.state = 'closed'
124+
125+
async def call(self, func, *args):
126+
if self.state == 'open':
127+
raise CircuitBreakerOpen()
128+
129+
try:
130+
result = await func(*args)
131+
self.failures = 0
132+
return result
133+
except Exception:
134+
self.failures += 1
135+
if self.failures >= self.threshold:
136+
self.state = 'open'
137+
asyncio.create_task(self.reset_timer())
138+
raise
139+
```
140+
141+
## Performance Optimization
142+
143+
### 1. Message Batching
144+
145+
```python
146+
class BatchProcessor(Worker):
147+
def __init__(self, batch_size=100):
148+
self.batch = []
149+
self.batch_size = batch_size
150+
151+
async def process(self, item):
152+
self.batch.append(item)
153+
if len(self.batch) >= self.batch_size:
154+
await self.process_batch(self.batch)
155+
self.batch = []
156+
```
157+
158+
### 2. Resource Pooling
159+
160+
```python
161+
class ResourcePool:
162+
def __init__(self, pool_size):
163+
self.pool = asyncio.Queue(pool_size)
164+
self.semaphore = asyncio.Semaphore(pool_size)
165+
166+
async def acquire(self):
167+
async with self.semaphore:
168+
return await self.pool.get()
169+
170+
async def release(self, resource):
171+
await self.pool.put(resource)
172+
```
173+
174+
## Security Best Practices
175+
176+
### 1. Message Authentication
177+
178+
```python
179+
class SecureAgent(Worker):
180+
def authenticate_message(self, message, signature):
181+
return hmac.verify(
182+
message.content,
183+
signature,
184+
self.secret_key
185+
)
186+
```
187+
188+
### 2. Access Control
189+
190+
```python
191+
class SecureWorker(Worker):
192+
async def on_message(self, agent: AgentDetail, data: bytes, time: int):
193+
if not self.authorize_peer(agent.id):
194+
logger.warning(f"Unauthorized message from {agent.id}")
195+
return
196+
await self.process_message(data)
197+
```
198+
199+
## Monitoring and Observability
200+
201+
### 1. Structured Logging
202+
203+
```python
204+
class ObservableAgent(Worker):
205+
async def log_event(self, event_type, **kwargs):
206+
logger.info(
207+
f"{event_type}",
208+
agent_id=self.id,
209+
timestamp=time.time(),
210+
**kwargs
211+
)
212+
```
213+
214+
### 2. Metrics Collection
215+
216+
```python
217+
class MetricsCollector:
218+
def __init__(self):
219+
self.metrics = {
220+
'messages_processed': Counter(),
221+
'processing_time': Histogram(),
222+
'error_rate': Gauge()
223+
}
224+
225+
async def record(self, metric, value):
226+
self.metrics[metric].record(value)
227+
```
228+
229+
## Implementation Guidelines
230+
231+
### 1. Message Design
232+
233+
- Include metadata for tracking
234+
- Add validation methods
235+
- Use clear naming conventions
236+
237+
```python
238+
@dataclass
239+
class Message:
240+
id: str = field(default_factory=uuid.uuid4)
241+
timestamp: float = field(default_factory=time.time)
242+
payload: Any
243+
metadata: Dict = field(default_factory=dict)
244+
245+
def validate(self) -> bool:
246+
return bool(self.payload)
247+
```
248+
249+
### 2. Communication Patterns
250+
251+
- Use broadcast for system-wide messages
252+
- Direct messages for point-to-point
253+
- Topic-based for selective communication
254+
255+
```python
256+
class CommunicationPatterns:
257+
async def broadcast_update(self, update):
258+
await self.broadcast_message(update)
259+
260+
async def direct_message(self, peer_id, message):
261+
await self.send_message(peer_id, message)
262+
263+
async def topic_message(self, topic, message):
264+
await self.publish(topic, message)
265+
```
266+
267+
### 3. State Transitions
268+
269+
- Define clear state machines
270+
- Validate transitions
271+
- Log state changes
272+
273+
```python
274+
class WorkflowAgent(Worker):
275+
async def transition_state(self, new_state):
276+
if new_state not in self.valid_transitions[self.current_state]:
277+
raise InvalidTransition(f"{self.current_state} -> {new_state}")
278+
279+
self.current_state = new_state
280+
await self.log_event("state_change", new_state=new_state)
281+
```
282+
283+
## Common Pitfalls
284+
285+
1. Race Conditions
286+
287+
- Use synchronization primitives
288+
- Implement proper locking
289+
- Handle concurrent access
290+
291+
2. Memory Leaks
292+
293+
- Clean up resources properly
294+
- Implement context managers
295+
- Monitor memory usage
296+
297+
3. Message Overflow
298+
299+
- Implement backpressure
300+
- Use flow control
301+
- Handle queue limits
302+
303+
4. Error Propagation
304+
305+
- Define error boundaries
306+
- Implement recovery strategies
307+
- Log error contexts
308+
309+
## Best Practices Checklist
310+
311+
### Design
312+
313+
- [ ] Single responsibility per agent
314+
- [ ] Clear message contracts
315+
- [ ] Proper state management
316+
- [ ] Error handling strategy
317+
318+
### Implementation
319+
320+
- [ ] Immutable messages
321+
- [ ] Resource cleanup
322+
- [ ] Proper logging
323+
- [ ] Security measures
324+
325+
### Operation
326+
327+
- [ ] Monitoring setup
328+
- [ ] Performance metrics
329+
- [ ] Error tracking
330+
- [ ] Resource monitoring
331+
332+
## Deployment Considerations
333+
334+
### Configuration
335+
336+
```python
337+
class ConfigurableAgent(Worker):
338+
def __init__(self, config_path: str):
339+
self.config = self.load_config(config_path)
340+
self.validate_config()
341+
```
342+
343+
### Resource Limits
344+
345+
```python
346+
class ResourceLimits:
347+
def __init__(self):
348+
self.max_connections = int(os.getenv('MAX_CONNECTIONS', 100))
349+
self.message_timeout = int(os.getenv('MESSAGE_TIMEOUT', 30))
350+
```
351+
352+
### Health Checks
353+
354+
```python
355+
class HealthCheck(Worker):
356+
async def check_health(self):
357+
return {
358+
'status': 'healthy',
359+
'connections': len(self.connections),
360+
'message_rate': self.message_counter.rate()
361+
}
362+
```

0 commit comments

Comments
 (0)