-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
39 changed files
with
3,131 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
package org.rx.crawler; | ||
|
||
import org.rx.core.FluentWait; | ||
import org.rx.core.Linq; | ||
import org.rx.core.Reflects; | ||
import org.rx.io.IOStream; | ||
|
||
import java.nio.charset.StandardCharsets; | ||
import java.util.concurrent.TimeoutException; | ||
|
||
public interface Browser extends AutoCloseable { | ||
String BLANK_URL = "about:blank", BODY_SELECTOR = "body"; | ||
|
||
static String readResourceJs(String resourcePath) { | ||
return IOStream.readString(Reflects.getResource(resourcePath), StandardCharsets.UTF_8); | ||
} | ||
|
||
BrowserType getType(); | ||
|
||
void setCookieRegion(String cookieRegion); | ||
|
||
long getWaitMillis(); | ||
|
||
String getCurrentUrl(); | ||
|
||
default FluentWait createWait(int timeoutSeconds) { | ||
return FluentWait.polling(timeoutSeconds * 1000L, getWaitMillis()); | ||
} | ||
|
||
default void navigateBlank() { | ||
nativeGet(BLANK_URL); | ||
} | ||
|
||
void navigateUrl(String url) throws TimeoutException; | ||
|
||
void navigateUrl(String url, String locatorSelector) throws TimeoutException; | ||
|
||
void navigateUrl(String url, String locatorSelector, int timeoutSeconds) throws TimeoutException; | ||
|
||
void nativeGet(String url); | ||
|
||
String saveCookies(boolean reset) throws TimeoutException; | ||
|
||
void clearCookies(boolean onlyBrowser); | ||
|
||
void setRawCookie(String rawCookie); | ||
|
||
String getRawCookie(); | ||
|
||
/** | ||
* 基本的selector,不能包含:eq(1)等 | ||
* | ||
* @param selector | ||
* @return | ||
*/ | ||
boolean hasElement(String selector); | ||
|
||
String elementText(String selector); | ||
|
||
Linq<String> elementsText(String selector); | ||
|
||
String elementVal(String selector); | ||
|
||
Linq<String> elementsVal(String selector); | ||
|
||
String elementAttr(String selector, String... attrArgs); | ||
|
||
Linq<String> elementsAttr(String selector, String... attrArgs); | ||
|
||
void elementClick(String selector); | ||
|
||
void elementClick(String selector, boolean waitElementLocated); | ||
|
||
void elementPress(String selector, String keys); | ||
|
||
void elementPress(String selector, String keys, boolean waitElementLocated); | ||
|
||
void waitElementLocated(String selector) throws TimeoutException; | ||
|
||
void waitElementLocated(String selector, int timeoutSeconds) throws TimeoutException; | ||
|
||
void injectScript(String script); | ||
|
||
//Boolean, Long, String, List, WebElement | ||
<T> T executeScript(String script, Object... args); | ||
|
||
<T> T injectAndExecuteScript(String injectScript, String script, Object... args); | ||
|
||
<T> T executeConfigureScript(String scriptName, Object... args); | ||
|
||
byte[] screenshotAsBytes(String selector); | ||
|
||
void focus(); | ||
|
||
void maximize(); | ||
|
||
void normalize(); | ||
} |
21 changes: 21 additions & 0 deletions
21
rxlib-x/src/main/java/org/rx/crawler/BrowserAsyncRequest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package org.rx.crawler; | ||
|
||
import lombok.Getter; | ||
import lombok.NonNull; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
import java.io.Serializable; | ||
import java.util.UUID; | ||
|
||
@Getter | ||
@RequiredArgsConstructor | ||
public class BrowserAsyncRequest implements Serializable, Comparable<BrowserAsyncRequest> { | ||
private final UUID asyncId; | ||
private final int priority; | ||
private final String url; | ||
|
||
@Override | ||
public int compareTo(@NonNull BrowserAsyncRequest o) { | ||
return Integer.compare(priority, o.priority); | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
rxlib-x/src/main/java/org/rx/crawler/BrowserAsyncResponse.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package org.rx.crawler; | ||
|
||
import lombok.Getter; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
import java.io.Serializable; | ||
import java.net.InetSocketAddress; | ||
|
||
@Getter | ||
@RequiredArgsConstructor | ||
public class BrowserAsyncResponse implements Serializable { | ||
private final BrowserAsyncRequest request; | ||
private final InetSocketAddress endpoint; | ||
} |
159 changes: 159 additions & 0 deletions
159
rxlib-x/src/main/java/org/rx/crawler/BrowserAsyncTopic.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package org.rx.crawler; | ||
|
||
import lombok.Getter; | ||
import lombok.NonNull; | ||
import lombok.RequiredArgsConstructor; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.redisson.api.RPriorityBlockingQueue; | ||
import org.redisson.api.RSetCache; | ||
import org.redisson.api.RTopic; | ||
import org.rx.core.Constants; | ||
import org.rx.core.ResetEventWait; | ||
import org.rx.exception.TraceHandler; | ||
import org.rx.redis.RedisCache; | ||
import org.rx.util.function.TripleAction; | ||
import org.rx.util.function.TripleFunc; | ||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; | ||
import org.springframework.stereotype.Component; | ||
|
||
import javax.annotation.PostConstruct; | ||
import java.util.List; | ||
import java.util.UUID; | ||
import java.util.concurrent.*; | ||
|
||
import static org.rx.core.Sys.toJsonString; | ||
|
||
@RequiredArgsConstructor | ||
@Component | ||
@ConditionalOnProperty(name = org.rx.spring.BeanRegister.REDIS_PROP_NAME) | ||
@Slf4j | ||
public class BrowserAsyncTopic { | ||
@RequiredArgsConstructor | ||
private class AsyncFuture<T> implements Future<T> { | ||
private final UUID asyncId; | ||
private final Object callback; | ||
private final ResetEventWait waiter = new ResetEventWait(); | ||
@Getter | ||
private volatile boolean done; | ||
private volatile Throwable exception; | ||
private T result; | ||
|
||
@Override | ||
public boolean cancel(boolean mayInterruptIfRunning) { | ||
return callbacks.remove(asyncId) != null; | ||
} | ||
|
||
@Override | ||
public boolean isCancelled() { | ||
return !callbacks.containsKey(asyncId); | ||
} | ||
|
||
@Override | ||
public T get() throws ExecutionException { | ||
try { | ||
return get(Constants.TIMEOUT_INFINITE, TimeUnit.MILLISECONDS); | ||
} catch (TimeoutException e) { | ||
log.warn("ignore", e); | ||
} | ||
return null; | ||
} | ||
|
||
@Override | ||
public T get(long timeout, TimeUnit unit) throws ExecutionException, TimeoutException { | ||
if (!waiter.waitOne(TimeUnit.MILLISECONDS.convert(timeout, unit))) { | ||
throw new TimeoutException(); | ||
} | ||
if (exception != null) { | ||
throw new ExecutionException(exception); | ||
} | ||
return result; | ||
} | ||
} | ||
|
||
//避免topic多次listen | ||
public static final String QUEUE_NAME = "BAsyncQueue", TOPIC_NAME = "BAsyncTopic", IN_PUBLISH_NAME = "BAsyncPublish"; | ||
private final RedisCache<?, ?> redisCache; | ||
private RPriorityBlockingQueue<BrowserAsyncRequest> queue; | ||
private RTopic topic; | ||
private RSetCache<Integer> publishSet; | ||
private final ConcurrentHashMap<UUID, AsyncFuture> callbacks = new ConcurrentHashMap<>(); | ||
|
||
@PostConstruct | ||
public void init() { | ||
queue = redisCache.getClient().getPriorityBlockingQueue(QUEUE_NAME); | ||
topic = redisCache.getClient().getTopic(TOPIC_NAME); | ||
publishSet = redisCache.getClient().getSetCache(IN_PUBLISH_NAME); | ||
// require(queue, queue.trySetComparator(Comparator.comparingInt(BrowserAsyncRequest::getPriority))); | ||
topic.addListener(BrowserAsyncResponse.class, (channel, asyncResponse) -> { | ||
log.info("Async consume response {}", toJsonString(asyncResponse)); | ||
try { | ||
AsyncFuture future = callbacks.get(asyncResponse.getRequest().getAsyncId()); | ||
if (future == null || future.isCancelled() || future.isDone()) { | ||
return; | ||
} | ||
try (RemoteBrowser browser = RemoteBrowser.wrap(asyncResponse.getEndpoint())) { | ||
if (future.isCancelled()) { | ||
return; | ||
} | ||
if (future.callback instanceof TripleFunc) { | ||
future.result = ((TripleFunc<RemoteBrowser, String, Object>) future.callback).invoke(browser, asyncResponse.getRequest().getUrl()); | ||
return; | ||
} | ||
((TripleAction<RemoteBrowser, String>) future.callback).invoke(browser, asyncResponse.getRequest().getUrl()); | ||
} catch (Throwable e) { | ||
TraceHandler.INSTANCE.log("Async {} error", future.asyncId, e); | ||
future.exception = e; | ||
} finally { | ||
callbacks.remove(future.asyncId); | ||
future.done = true; | ||
future.waiter.set(); | ||
} | ||
} finally { | ||
publishSet.remove(asyncResponse.getEndpoint().getPort()); | ||
} | ||
}); | ||
log.info("register BrowserAsyncTopic ok"); | ||
} | ||
|
||
//region Consume | ||
public void add(@NonNull BrowserAsyncRequest request) { | ||
queue.add(request); | ||
} | ||
|
||
public Future listen(UUID asyncId, TripleAction<RemoteBrowser, String> callback) { | ||
AsyncFuture future = new AsyncFuture(asyncId, callback); | ||
callbacks.put(asyncId, future); | ||
return future; | ||
} | ||
|
||
public <T> Future<T> listen(UUID asyncId, TripleFunc<RemoteBrowser, String, T> callback) { | ||
AsyncFuture<T> future = new AsyncFuture<>(asyncId, callback); | ||
callbacks.put(asyncId, future); | ||
return future; | ||
} | ||
//endregion | ||
|
||
//region produce | ||
public List<BrowserAsyncRequest> poll(int takeCount) { | ||
return queue.poll(takeCount); | ||
} | ||
|
||
public BrowserAsyncRequest poll() { | ||
return queue.poll(); | ||
} | ||
|
||
public boolean isPublishing(int nextIdleId) { | ||
return publishSet.contains(nextIdleId); | ||
} | ||
|
||
public void publish(BrowserAsyncResponse response) { | ||
if (response == null || response.getRequest() == null || response.getRequest().getAsyncId() == null || response.getEndpoint() == null) { | ||
log.warn("Async publish invalid response {}", toJsonString(response)); | ||
return; | ||
} | ||
|
||
publishSet.add(response.getEndpoint().getPort(), 6, TimeUnit.SECONDS); | ||
topic.publish(response); | ||
} | ||
//endregion | ||
} |
5 changes: 5 additions & 0 deletions
5
rxlib-x/src/main/java/org/rx/crawler/BrowserPoolListener.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package org.rx.crawler; | ||
|
||
public interface BrowserPoolListener extends AutoCloseable { | ||
int nextIdleId(BrowserType type); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package org.rx.crawler; | ||
|
||
import lombok.Getter; | ||
import lombok.RequiredArgsConstructor; | ||
|
||
@RequiredArgsConstructor | ||
@Getter | ||
public enum BrowserType { | ||
CHROME("chrome.exe", "chromedriver.exe"), | ||
IE("iexplore.exe", "IEDriverServer.exe"); | ||
|
||
private final String processName; | ||
private final String driverName; | ||
} |
Oops, something went wrong.