Skip to content

Commit e3b31e8

Browse files
committed
Fix race condition in HttpProtocol while having multiple proxy settings
In HttpProtocol implementation, the client builder was singleton and may be accessed and modified by different threads at same time. The result is that a wrong proxy will be used or a wrong proxy auth will be configured. To fix it, create a local builder insteand of having a field-level builder. Fixes #1247
1 parent ef0899e commit e3b31e8

File tree

2 files changed

+118
-147
lines changed

2 files changed

+118
-147
lines changed

core/src/main/java/org/apache/stormcrawler/protocol/httpclient/HttpProtocol.java

+46-68
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
import java.nio.charset.StandardCharsets;
2323
import java.util.ArrayList;
2424
import java.util.Base64;
25-
import java.util.Collection;
26-
import java.util.LinkedList;
25+
import java.util.Collections;
2726
import java.util.List;
2827
import java.util.Locale;
2928
import org.apache.commons.lang.StringUtils;
@@ -78,10 +77,11 @@ public class HttpProtocol extends AbstractHttpProtocol
7877

7978
private int globalMaxContent;
8079

81-
private HttpClientBuilder builder;
82-
8380
private RequestConfig requestConfig;
84-
private RequestConfig.Builder requestConfigBuilder;
81+
82+
private String userAgent;
83+
84+
private final List<BasicHeader> defaultHeaders = new ArrayList<>();
8585

8686
@Override
8787
public void configure(final Config conf) {
@@ -100,19 +100,14 @@ public void configure(final Config conf) {
100100

101101
globalMaxContent = ConfUtils.getInt(conf, "http.content.limit", -1);
102102

103-
String userAgent = getAgentString(conf);
104-
105-
Collection<BasicHeader> defaultHeaders = new LinkedList<>();
103+
userAgent = getAgentString(conf);
106104

107105
String accept = ConfUtils.getString(conf, "http.accept");
108106
if (StringUtils.isNotBlank(accept)) {
109107
defaultHeaders.add(new BasicHeader("Accept", accept));
110108
}
111109

112-
customHeaders.forEach(
113-
h -> {
114-
defaultHeaders.add(new BasicHeader(h.getKey(), h.getValue()));
115-
});
110+
customHeaders.forEach(h -> defaultHeaders.add(new BasicHeader(h.getKey(), h.getValue())));
116111

117112
String basicAuthUser = ConfUtils.getString(conf, "http.basicauth.user", null);
118113

@@ -132,71 +127,28 @@ public void configure(final Config conf) {
132127
defaultHeaders.add(new BasicHeader("Accept-Language", acceptLanguage));
133128
}
134129

135-
builder =
136-
HttpClients.custom()
137-
.setUserAgent(userAgent)
138-
.setDefaultHeaders(defaultHeaders)
139-
.setConnectionManager(CONNECTION_MANAGER)
140-
.setConnectionManagerShared(true)
141-
.disableRedirectHandling()
142-
.disableAutomaticRetries();
143-
144130
int timeout = ConfUtils.getInt(conf, "http.timeout", 10000);
145131

146-
requestConfigBuilder =
147-
RequestConfig.custom()
148-
.setSocketTimeout(timeout)
149-
.setConnectTimeout(timeout)
150-
.setConnectionRequestTimeout(timeout)
151-
.setCookieSpec(CookieSpecs.STANDARD);
152-
153-
requestConfig = requestConfigBuilder.build();
132+
requestConfig = RequestConfig.custom()
133+
.setSocketTimeout(timeout)
134+
.setConnectTimeout(timeout)
135+
.setConnectionRequestTimeout(timeout)
136+
.setCookieSpec(CookieSpecs.STANDARD)
137+
// Can make configurable and add more in future
138+
.setProxyPreferredAuthSchemes(Collections.singletonList(AuthSchemes.BASIC))
139+
.build();
154140
}
155141

156142
@Override
157143
public ProtocolResponse getProtocolOutput(String url, Metadata md) throws Exception {
158144

159145
LOG.debug("HTTP connection manager stats {}", CONNECTION_MANAGER.getTotalStats());
160146

161-
// set default request config to global config
162-
RequestConfig reqConfig = requestConfig;
163-
147+
SCProxy proxy = null;
164148
// conditionally add a dynamic proxy
165149
if (proxyManager != null) {
166-
// retrieve proxy from proxy manager
167-
SCProxy prox = proxyManager.getProxy(md);
168-
169-
// conditionally configure proxy authentication
170-
if (StringUtils.isNotBlank(prox.getUsername())) {
171-
List<String> authSchemes = new ArrayList<>();
172-
173-
// Can make configurable and add more in future
174-
authSchemes.add(AuthSchemes.BASIC);
175-
requestConfigBuilder.setProxyPreferredAuthSchemes(authSchemes);
176-
177-
BasicCredentialsProvider basicAuthCreds = new BasicCredentialsProvider();
178-
basicAuthCreds.setCredentials(
179-
new AuthScope(prox.getAddress(), Integer.parseInt(prox.getPort())),
180-
new UsernamePasswordCredentials(prox.getUsername(), prox.getPassword()));
181-
builder.setDefaultCredentialsProvider(basicAuthCreds);
182-
}
183-
184-
HttpHost proxy = new HttpHost(prox.getAddress(), Integer.parseInt(prox.getPort()));
185-
DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);
186-
builder.setRoutePlanner(routePlanner);
187-
188-
// save start time for debugging speed impact of request config
189-
// build
190-
long buildStart = System.currentTimeMillis();
191-
192-
// set request config to new configuration with dynamic proxy
193-
reqConfig = requestConfigBuilder.build();
194-
195-
LOG.debug(
196-
"time to build http request config with proxy: {}ms",
197-
System.currentTimeMillis() - buildStart);
198-
199-
LOG.debug("fetching with " + prox.toString());
150+
proxy = proxyManager.getProxy(md);
151+
LOG.debug("fetching with {}", proxy);
200152
}
201153

202154
HttpRequestBase request = new HttpGet(url);
@@ -246,11 +198,11 @@ public ProtocolResponse getProtocolOutput(String url, Metadata md) throws Except
246198
}
247199
}
248200

249-
request.setConfig(reqConfig);
201+
request.setConfig(requestConfig);
250202

251203
// no need to release the connection explicitly as this is handled
252204
// automatically. The client itself must be closed though.
253-
try (CloseableHttpClient httpclient = builder.build()) {
205+
try (CloseableHttpClient httpclient = createClient(proxy)) {
254206
return httpclient.execute(request, responseHandler);
255207
}
256208
}
@@ -372,6 +324,32 @@ private static byte[] toByteArray(
372324
return buffer.toByteArray();
373325
}
374326

327+
private CloseableHttpClient createClient(final SCProxy proxy) {
328+
final HttpClientBuilder builder = HttpClients.custom()
329+
.setUserAgent(userAgent)
330+
.setDefaultHeaders(defaultHeaders)
331+
.setConnectionManager(CONNECTION_MANAGER)
332+
.setConnectionManagerShared(true)
333+
.disableRedirectHandling()
334+
.disableAutomaticRetries();
335+
336+
if (proxy != null) {
337+
final DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(new HttpHost(
338+
proxy.getAddress(), Integer.parseInt(proxy.getPort())));
339+
builder.setRoutePlanner(routePlanner);
340+
if (StringUtils.isNotBlank(proxy.getUsername())) {
341+
// conditionally configure proxy authentication
342+
final BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
343+
credentialsProvider.setCredentials(
344+
new AuthScope(proxy.getAddress(), Integer.parseInt(proxy.getPort())),
345+
new UsernamePasswordCredentials(proxy.getUsername(), proxy.getPassword()));
346+
builder.setDefaultCredentialsProvider(credentialsProvider);
347+
}
348+
}
349+
350+
return builder.build();
351+
}
352+
375353
public static void main(String[] args) throws Exception {
376354
Protocol.main(new HttpProtocol(), args);
377355
}

0 commit comments

Comments
 (0)