Skip to content

Commit

Permalink
Merge branch 'release/1.0.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
sutra committed Nov 26, 2024
2 parents e98f7d7 + bf1088b commit 837253c
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 13 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ WebMagic use slf4j with slf4j-log4j12 implementation. If you customized your slf

### First crawler:

Write a class implements PageProcessor. For example, I wrote a crawler of github repository infomation.
Write a class implements PageProcessor. For example, I wrote a crawler of github repository information.

```java
public class GithubRepoPageProcessor implements PageProcessor {
Expand Down Expand Up @@ -112,7 +112,7 @@ public class GithubRepo {

Documents: [http://webmagic.io/docs/](http://webmagic.io/docs/)

The architecture of webmagic (refered to [Scrapy](http://scrapy.org/))
The architecture of webmagic (referred to [Scrapy](http://scrapy.org/))

![image](http://code4craft.github.io/images/posts/webmagic.png)

Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<version>2.2.1</version>
</parent>
<groupId>us.codecraft</groupId>
<version>1.0.1</version>
<version>1.0.2</version>
<packaging>pom</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down
2 changes: 1 addition & 1 deletion webmagic-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
37 changes: 37 additions & 0 deletions webmagic-core/src/main/java/us/codecraft/webmagic/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,44 @@ public class Page {

private String charset;

/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code true},
* and {@link #request} is specified.
*
* @param request the request.
* @since 1.0.2
*/
public static Page ofSuccess(Request request) {
return new Page(request, true);
}

/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code true},
* and {@link #request} is specified.
*
* @param request the request.
* @since 1.0.2
*/
public static Page ofFailure(Request request) {
return new Page(request, false);
}

public Page() {
}

/**
* Constructs a {@link Page} with {@link #request}
* and {@link #downloadSuccess} specified.
*
* @param request the request.
* @param downloadSuccess the download success flag.
* @since 1.0.2
*/
private Page(Request request, boolean downloadSuccess) {
this.request = request;
this.downloadSuccess = downloadSuccess;
}

/**
* Returns a {@link Page} with {@link #downloadSuccess} is {@code false}.
*
Expand All @@ -73,7 +108,9 @@ public static Page fail() {
* @param request the {@link Request}.
* @return the page.
* @since 0.10.0
* @deprecated Use {@link #ofFailure(Request)} instead.
*/
@Deprecated(since = "1.0.2", forRemoval = true)
public static Page fail(Request request){
Page page = new Page();
page.setRequest(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,14 @@ public Page download(Request request, Task task) {
CloseableHttpClient httpClient = getHttpClient(task.getSite());
Proxy proxy = proxyProvider != null ? proxyProvider.getProxy(request, task) : null;
HttpClientRequestContext requestContext = httpUriRequestConverter.convert(request, task.getSite(), proxy);
Page page = Page.fail(request);
Page page = null;
try {
httpResponse = httpClient.execute(requestContext.getHttpUriRequest(), requestContext.getHttpClientContext());
page = handleResponse(request, request.getCharset() != null ? request.getCharset() : task.getSite().getCharset(), httpResponse, task);
onSuccess(page, task);
return page;
} catch (IOException e) {
page = Page.ofFailure(request);
onError(page, task, e);
return page;
} finally {
Expand All @@ -105,7 +106,7 @@ protected Page handleResponse(Request request, String charset, HttpResponse http
HttpEntity entity = httpResponse.getEntity();
byte[] bytes = entity != null ? IOUtils.toByteArray(entity.getContent()) : new byte[0];
String contentType = entity != null && entity.getContentType() != null ? entity.getContentType().getValue() : null;
Page page = new Page();
Page page = Page.ofSuccess(request);
page.setBytes(bytes);
if (!request.isBinaryContent()) {
if (charset == null) {
Expand All @@ -117,7 +118,6 @@ protected Page handleResponse(Request request, String charset, HttpResponse http
page.setUrl(new PlainText(request.getUrl()));
page.setRequest(request);
page.setStatusCode(httpResponse.getStatusLine().getStatusCode());
page.setDownloadSuccess(true);
if (responseHeader) {
page.setHeaders(HttpClientUtils.convertHeaders(httpResponse.getAllHeaders()));
}
Expand Down
2 changes: 1 addition & 1 deletion webmagic-coverage/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>

<artifactId>webmagic-coverage</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion webmagic-extension/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion webmagic-samples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion webmagic-saxon/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion webmagic-scripts/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
2 changes: 1 addition & 1 deletion webmagic-selenium/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<parent>
<groupId>us.codecraft</groupId>
<artifactId>webmagic</artifactId>
<version>1.0.1</version>
<version>1.0.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down

0 comments on commit 837253c

Please sign in to comment.