Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial steam scrapper #236

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.mkrzywanski.pn.scrapper.contract

import io.mkrzywanski.gpn.scrapper.domain.post.*
import io.mkrzywanski.gpn.scrapper.domain.post.price.Currencies
import io.mkrzywanski.pn.scrapper.app.adapters.publishing.QueuePostPublisher
import io.mkrzywanski.pn.scrapper.app.infra.QueueConfig
import org.springframework.amqp.rabbit.annotation.RabbitListener
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.mkrzywanski.pn.scrapper.app.adapters.persistance;

import io.mkrzywanski.gpn.scrapper.domain.post.GameOffer;
import io.mkrzywanski.gpn.scrapper.domain.post.GamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;
import lombok.Value;

import java.util.UUID;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package io.mkrzywanski.pn.scrapper.app.adapters.persistance;

import io.mkrzywanski.gpn.scrapper.domain.post.*;
import io.mkrzywanski.gpn.scrapper.domain.post.price.CompositeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.EmptyGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.FreeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;

import java.util.Map;
import java.util.Optional;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package io.mkrzywanski.pn.scrapper.app.adapters.persistance;

import io.mkrzywanski.gpn.scrapper.domain.post.CompositeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.EmptyGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.FreeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.GamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.CompositeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.EmptyGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.FreeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.NumberGamePrice;

import java.math.BigDecimal;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
dependencies {
implementation group: 'org.jsoup', name: 'jsoup', version: '1.15.3'
implementation 'com.jayway.jsonpath:json-path:2.7.0'
testImplementation group: 'com.github.tomakehurst', name: 'wiremock', version: "${wiremockVersion}"
implementation group: 'com.fasterxml.jackson.core', name: 'jackson-databind', version: '2.13.4.2'
implementation group: 'com.google.guava', name: 'guava', version: '31.1-jre'


}
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package io.mkrzywanski.gpn.scrapper.domain.gamehunter;

import io.mkrzywanski.gpn.scrapper.domain.post.*;
import io.mkrzywanski.gpn.scrapper.domain.post.NumberGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.CompositeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.EmptyGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.FreeGamePrice;
import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;

import java.util.Collection;
import java.util.HashSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,36 +48,36 @@ public void scrap() {
}
LOGGER.info("Scrapped page {} . Posts {}", pageNumber, scrappedPosts);
final Set<Hash> scrappedPostHashes = extractHashes(scrappedPosts);
final List<Hash> alreadySavedPosts = postRepository.findByHashIn(scrappedPostHashes);
final List<Hash> alreadySavedPostHashes = postRepository.findByHashIn(scrappedPostHashes);

newPostHashes = new HashSet<>(scrappedPostHashes);
alreadySavedPosts.forEach(newPostHashes::remove);
alreadySavedPostHashes.forEach(newPostHashes::remove);

LOGGER.info("New post hashes {}", newPostHashes);
LOGGER.info("Already saved post hashes {}", alreadySavedPosts);
LOGGER.info("Already saved post hashes {}", alreadySavedPostHashes);

final List<Post> newPostsFromCurrentPage = scrappedPosts.stream()
.filter(post -> !alreadySavedPosts.contains(post.getHash()))
.filter(post -> !alreadySavedPostHashes.contains(post.getHash()))
.toList();

LOGGER.info("New posts to be saved from page {} : {}", pageNumber, newPostsFromCurrentPage);
allNewPosts.addAll(newPostsFromCurrentPage);

final boolean currentPageIsPartiallyScrapped = alreadySavedPosts.size() > 0;
final boolean currentPageIsPartiallyScrapped = alreadySavedPostHashes.size() > 0;
if (currentPageIsPartiallyScrapped) {
break;
}

pageNumber++;

} while (true);
} while (!Thread.currentThread().isInterrupted());

final Set<Post> distinctByHash = allNewPosts.stream()
.filter(distinctByKey(Post::getHash))
.collect(Collectors.toSet());

postRepository.saveAll(distinctByHash);
postTransactionalOutboxRepository.put(new HashSet<>(distinctByHash));
postTransactionalOutboxRepository.put(distinctByHash);
}

private Set<Hash> extractHashes(final List<Post> scrappedPosts) {
Expand All @@ -95,7 +95,6 @@ private List<Post> scrapPage(final int pageNumber) {
}

private static <T> Predicate<T> distinctByKey(final Function<? super T, ?> keyExtractor) {

final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.mkrzywanski.gpn.scrapper.domain.post;

import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;
import lombok.Value;

import java.util.UUID;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.mkrzywanski.gpn.scrapper.domain.post;

import io.mkrzywanski.gpn.scrapper.domain.post.price.Currencies;
import io.mkrzywanski.gpn.scrapper.domain.post.price.GamePrice;

import java.math.BigDecimal;
import java.util.Currency;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.mkrzywanski.gpn.scrapper.domain.post;
package io.mkrzywanski.gpn.scrapper.domain.post.price;

import java.math.BigDecimal;
import java.util.Collection;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.mkrzywanski.gpn.scrapper.domain.post;
package io.mkrzywanski.gpn.scrapper.domain.post.price;

import java.util.Currency;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.mkrzywanski.gpn.scrapper.domain.post;
package io.mkrzywanski.gpn.scrapper.domain.post.price;

import java.math.BigDecimal;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.mkrzywanski.gpn.scrapper.domain.post;
package io.mkrzywanski.gpn.scrapper.domain.post.price;

import java.math.BigDecimal;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.mkrzywanski.gpn.scrapper.domain.post;
package io.mkrzywanski.gpn.scrapper.domain.post.price;

import java.math.BigDecimal;
import java.util.Currency;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

record FullGameInfo(Integer gameId, String gameName, GameInfo gameInfo) {
boolean isSuccess() {
return gameInfo().isSuccess();
}

boolean isOnSale() {
return gameInfo.isOnSale();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

import com.fasterxml.jackson.annotation.JsonProperty;

record GameData(@JsonProperty("price_overview") PriceOverview priceInfo) {
static GameData empty() {
return new GameData(new PriceOverview("PLN", -1, -1, -1));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

record GameInfo(boolean success, GameData data) {
boolean isOnSale() {
return data.priceInfo().discount_percent() > 0;
}

boolean isSuccess() {
return this.success;
}

static class Builder {
private boolean success;
private GameData gameData = GameData.empty();

Builder withSuccessValue(final boolean success) {
this.success = success;
return this;
}

Builder withSuccess() {
this.success = true;
return this;
}

Builder withFailure() {
this.success = false;
return this;
}

Builder withGameData(final GameData gameData) {
this.gameData = gameData;
return this;
}

GameInfo build() {
return new GameInfo(success, gameData);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;

import java.io.IOException;

/**
* Since steam api sometimes returns json array in "data" field instead of object, this serializer is used
* to detect such cases and skip them as failures.
*/
class GameInfoDeserializer extends StdDeserializer<GameInfo> {
protected GameInfoDeserializer() {
super(GameInfo.class);
}

@Override
public GameInfo deserialize(final JsonParser parser, final DeserializationContext ctxt) throws IOException {

final var gameDataBuilder = new GameInfo.Builder();
while (!parser.isClosed()) {
final JsonToken jsonToken = parser.nextToken();

if (JsonToken.FIELD_NAME.equals(jsonToken)) {
final String fieldName = parser.getCurrentName();

parser.nextToken();

if ("success".equals(fieldName)) {
gameDataBuilder.withSuccessValue(parser.getBooleanValue());
} else if ("data".equals(fieldName)) {
handleDataFieldDeserialization(parser, gameDataBuilder);
}
}
}
return gameDataBuilder.build();
}

private void handleDataFieldDeserialization(final JsonParser parser, final GameInfo.Builder gameData) throws IOException {
final boolean isNotArrayStart = !JsonToken.START_ARRAY.equals(parser.currentToken());
if (isNotArrayStart) {
gameData.withGameData(parser.readValueAs(GameData.class));
} else {
gameData.withFailure();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;

import java.io.IOException;

class JsonMappingErrorHandlingDeserializer<T> extends JsonDeserializer<T> {
private final StdDeserializer<T> delegate;

JsonMappingErrorHandlingDeserializer(final StdDeserializer<T> delegate) {
this.delegate = delegate;
}

@Override
public T deserialize(final JsonParser jp, final DeserializationContext ctxt) throws IOException {
try {
return delegate.deserialize(jp, ctxt);
} catch (final JsonMappingException e) {
// If a JSON Mapping occurs, simply returning null instead of blocking things
return null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package io.mkrzywanski.gpn.scrapper.domain.steam;

import com.fasterxml.jackson.annotation.JsonProperty;

record PriceOverview(String currency, int initial, int discount_percent, @JsonProperty("final") int finalPrice) {
}
Loading