Skip to content

Commit

Permalink
adding playwright and updating examples (#399)
Browse files Browse the repository at this point in the history
* adding playwright scraper

* adding-playwright

* updating-examples

* fixing-minor-issue
  • Loading branch information
Shyam-Raghuwanshi authored Jul 19, 2024
1 parent 814c491 commit 2a01739
Show file tree
Hide file tree
Showing 29 changed files with 576 additions and 138 deletions.
10 changes: 8 additions & 2 deletions JS/edgechains/arakoodev/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
"./arakooserver": "./dist/arakooserver/src/index.js",
"./db": "./dist/db/src/index.js",
"./scraper": "./dist/scraper/src/index.js",
"./sync-rpc": "./dist/sync-rpc/export.js"
"./sync-rpc": "./dist/sync-rpc/export.js",
"./playwright": "./dist/playwright/src/index.js"
},
"scripts": {
"build": "rm -rf dist && tsc -b && cp -r src/sync-rpc dist/sync-rpc",
"lint": "eslint --ignore-path .eslintignore --ext .js,.ts",
"format": "prettier --ignore-path .gitignore --write \"**/*.+(js|ts|json)\"",
"test": "npx jest"
"test": "vitest"
},
"dependencies": {
"@babel/core": "^7.24.4",
Expand All @@ -30,22 +31,27 @@
"axios-retry": "^4.1.0",
"cheerio": "^1.0.0-rc.12",
"cors": "^2.8.5",
"document": "^0.4.7",
"dts-bundle-generator": "^9.3.1",
"esbuild": "^0.20.2",
"eventsource-parser": "^1.1.2",
"get-port": "^7.1.0",
"hono": "3.9",
"jest-environment-jsdom": "^29.7.0",
"jsdom": "^24.1.0",
"node-fetch": "^3.3.2",
"node-html-parser": "^6.1.13",
"pdf-parse": "^1.1.1",
"pg": "^8.11.5",
"playwright": "^1.45.1",
"prettier": "^3.2.5",
"regenerator-runtime": "^0.14.1",
"request": "^2.88.2",
"retry": "^0.13.1",
"text-encoding": "^0.7.0",
"ts-node": "^10.9.2",
"typeorm": "^0.3.20",
"vitest": "^2.0.3",
"web-streams-polyfill": "^4.0.0",
"youtube-transcript": "^1.2.1",
"zod": "^3.23.8",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ArakooServer } from "../../../../../dist/arakooserver/src/lib/hono/hono.js";
import { Hono } from "hono";

import { describe, expect, it } from 'vitest'
describe("ArakooServer", () => {
let arakooServer = new ArakooServer();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ export class OpenAI {
.post(
openAI_url,
{
model: chatOptions.model || "gpt-3.5-turbo",
model: chatOptions.model || "gpt-3.5-turbo-16k",
messages: [
{
role: chatOptions.role || "user",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import axios from "axios";
import { ChatOpenAi } from "../../../../dist/openai/src/lib/endpoints/OpenAiEndpoint.js";
import { OpenAI } from "../../../../dist/openai/src/lib/endpoints/OpenAiEndpoint.js";

jest.mock("axios");

Expand All @@ -15,8 +15,8 @@ describe("ChatOpenAi", () => {
];

axios.post = jest.fn().mockResolvedValueOnce({ data: { choices: mockResponse } });
const chatOpenAi = new ChatOpenAi({ openAIApiKey: "test_api_key" });
const response = await chatOpenAi.generateResponse("test prompt");
const chatOpenAi = new OpenAI({ apiKey: "test_api_key" });
const response = await chatOpenAi.chat({prompt:"test prompt"});
expect(response).toEqual("Test response");
});
});
Expand All @@ -25,7 +25,7 @@ describe("ChatOpenAi", () => {
test("should generate embeddings from OpenAI", async () => {
const mockResponse = { embeddings: "Test embeddings" };
axios.post = jest.fn().mockResolvedValue({ data: { data: { choices: mockResponse } } });
const chatOpenAi = new ChatOpenAi({ openAIApiKey: "test_api_key" });
const chatOpenAi = new OpenAI({ apiKey: "test_api_key" });
const res = await chatOpenAi.generateEmbeddings("test prompt");
expect(res.choices.embeddings).toEqual("Test embeddings");
});
Expand All @@ -46,7 +46,7 @@ describe("ChatOpenAi", () => {
},
];
axios.post = jest.fn().mockResolvedValueOnce({ data: { choices: mockResponse } });
const chatOpenAi = new ChatOpenAi({ openAIApiKey: "test_api_key" });
const chatOpenAi = new OpenAI({ apiKey: "test_api_key" });
const chatMessages = [
{
role: "user",
Expand All @@ -57,7 +57,8 @@ describe("ChatOpenAi", () => {
content: "message 2",
},
];
const responses = await chatOpenAi.chatWithAI(chatMessages);
//@ts-ignore
const responses = await chatOpenAi.chat({messages:chatMessages});
expect(responses).toEqual(mockResponse);
});
});
Expand All @@ -72,8 +73,8 @@ describe("ChatOpenAi", () => {
},
];
axios.post = jest.fn().mockResolvedValueOnce({ data: { choices: mockResponse } });
const chatOpenAi = new ChatOpenAi({ openAIApiKey: "test_api_key" });
const response = await chatOpenAi.testResponseGeneration("test prompt");
const chatOpenAi = new OpenAI({ apiKey: "test_api_key" });
const response = await chatOpenAi.chat({prompt:"test prompt"});
expect(response).toEqual("Test response");
});
});
Expand Down
4 changes: 3 additions & 1 deletion JS/edgechains/arakoodev/src/scraper/src/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
export { WebScraper } from "./lib/webScraper";
export { Cheerio } from "./lib/cheerio";
export { AutoPlayWriteWebPageScrapper } from "./lib/autoPlaywrightPageScrapper";
export { Playwright } from "./lib/playwright";
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

import { chromium } from "playwright";

export class AutoPlayWriteWebPageScrapper {

constructor() { }

async getContent(url: string): Promise<string> {
const browser = await chromium.launch({
headless: true,
});
const page = await browser.newPage();
await page.goto(url, {
waitUntil: "domcontentloaded",
});
const textContent = await page.innerText('html');
await browser.close();
const regex = new RegExp("\n", "g");
return textContent.replace(regex, "").replace(/\s{2,}/g, ' ');
}

}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import axios from "axios";
import cheerio from "cheerio";

export class WebScraper {
export class Cheerio {
constructor() {}
async getContent(url: string): Promise<string> {
const content = await axios(url);
Expand Down
204 changes: 204 additions & 0 deletions JS/edgechains/arakoodev/src/scraper/src/lib/playwright.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import { chromium } from "playwright"
import axios from "axios";
import {
parseArr,
parseSite,
preprocessJsonInput,
} from '../utils/index';
import retry from "retry";
import { removeBlankTags } from "../utils/page-parser";

export class Playwright {

constructor() { }

async #createPrompt({ task, page, }: { task: string, page: any }) {
return `
You are a Senior SDET tasked with writing Playwright code for testing purposes. Your role involves implementing specific task-based code segments within a larger test file, following the instructions provided closely. Assume that common imports like 'test' and 'expect' from '@playwright/test' are already at the top of the file.
Context:
- Your computer is a Mac. Cmd is the meta key, META.
- The browser is already open.
- Current page URL: ${await page.evaluate('location.href')}.
- Current page title: ${await page.evaluate('document.title')}.
- Overview of the site in HTML format:
\\\
${removeBlankTags((await parseSite(page))).slice(0, 25000)}
\\\
Key Points:
- Start directly with Playwright actions as described in the user task, without adding extraneous steps or assertions.
- Include assertions like 'expect' statements or wait functions such as 'waitForLoadState' only when they are specifically requested in the user task.
- Minimal, relevant comments should be used to clarify complex actions or essential aspects of the test's purpose.
- Apply 'frameLocator' for content in nested iframes, as needed based on the task requirements.
- Store the output in a variable and Return the output not log that
User Task: [Insert the specific user task here, including any detailed instructions related to the execution, waiting for specific conditions, or explicit requests for assertions and waits.]
Expected Code Format:
\\\
// [Insert Playwright code based on the task description. Begin with necessary actions directly, and include 'waitForLoadState', assertions, or 'expect' statements only if explicitly requested in the task. Comments should be concise and pertinent, especially for complex actions or decisions.]
\\\
The objective is to create Playwright code that is efficient, precise, and perfectly aligned with the task's requirements, integrating seamlessly into the larger test file. All actions and comments should be relevant and necessary, catering to a senior-level professional's understanding of the testing scenario.
HumanMessage Write Playwright code for this: ${task}
Examples:
go to hacker news - await page.goto('https://news.ycombinator.com/')
click on the first link - page.click('a[href="https://blog.sbensu.com/posts/demand-for-visual-programming/"]')
give me all the text of this page - await page.waitForLoadState('networkidle')
Some Playwright Actions that should use for you reference:
- await page.goto('https://github.com/login');
- await page.getByLabel('Username or email address').fill('username');
- await page.getByLabel('Password').fill('password');
- await page.getByRole('button', { name: 'Sign in' }).click();
- await page.innerText('html')
- page.getByRole('button', { name: 'submit' });
- page.getByRole('listitem').filter({ hasText: 'Product 2' });
- await page.getByRole('listitem').filter({ hasText: 'Product 2' }).getByRole('button', { name: 'Add to cart' }).click();
- page.locator('button.buttonIcon.episode-actions-later');
- await expect(page.getByText('welcome')).toBeVisible();
- await expect(page.getByText('welcome')).toBeVisible();
- await page.innerText(selector);
- await page.innerText(selector, options);
- const page = await browser.newPage();
- await page.goto('https://keycode.info');
- await page.press('body', 'A');
- await page.screenshot({ path: 'A.png' });
- await page.press('body', 'ArrowLeft');
- await page.screenshot({ path: 'ArrowLeft.png' });
- await page.press('body', 'Shift+O');
- await page.screenshot({ path: 'O.png' });
- await browser.close();
// click on the links, example
- await page.click('a[href="https://blog.sbensu.com/posts/demand-for-visual-programming/"]');
`
}

#createPromptForTaskArr(task: string) {
return `
Given the following task description:
${task}
Extract the key actions from this task and return them as an array of strings. Each action should be a separate string in the array. If the task description contains syntax errors or you think a command can be improved for better clarity and effectiveness, please make the necessary corrections and improvements. For example:
Input:
"Go to Hacker News and click on the first link. Then give me all the text of this page."
Output:
\`\`\`
[
"Navigate to the Hacker News website by entering the URL 'https://news.ycombinator.com/' in the browser",
"Identify and click on the first link displayed on the Hacker News homepage",
"Extract and return all the text content from the page"
]
Ensure that each action is specific, clear, and comprehensive to facilitate precise implementation.
\`\`\`
`
}

async #openAIRequest({ chatApi, prompt }: { chatApi: string, prompt: string }) {
return new Promise((resolve, reject) => {
const operation = retry.operation({
retries: 5,
factor: 3,
minTimeout: 1 * 1000,
maxTimeout: 60 * 1000,
randomize: true,
});

operation.attempt(async function (currentAttempt) {
await axios
.post(
"https://api.openai.com/v1/chat/completions",
{
model: "gpt-3.5-turbo-16k",
messages: [{ role: "user", content: prompt }],
max_tokens: 1000,
temperature: 0.7,
},
{
headers: {
Authorization: "Bearer " + chatApi,
"content-type": "application/json",
},
}
)
.then((response) => {
resolve(response.data.choices[0].message.content);
})
.catch((error) => {
if (error.response) {
console.log("Server responded with status code:", error.response.status);
console.log("Response data:", error.response.data);
} else if (error.request) {
console.log("No response received:", error);
} else {
console.log("Error creating request:", error.message, "\n", "Retrying ", currentAttempt);
}
if (operation.retry(error)) {
return;
};
reject(error);
});
})
}
)
}

/**
* Get Playwright code for a specific task
* @param chatApi - OpenAI API key
* @param task - Task description
* @param url - URL to navigate to default is https://www.google.com
* @param headless - Run in headless mode default is false
* @returns Playwright code example - page.goto('https://www.google.com')
**/
async call({ chatApi, task, url, headless = true }: { chatApi: string, task: string, url?: string, headless?: boolean }) {

const AsyncFunction = async function () { }.constructor;

const browser = await chromium.launch({
headless: headless
});

const page = await browser.newPage();
await page.goto(url || "https://www.google.com");

const taskPrompt = this.#createPromptForTaskArr(task);
const taskArr: any = parseArr(await this.#openAIRequest({ chatApi, prompt: taskPrompt }))

let response: string = "";

for (let i = 0; i < taskArr.length; i++) {
if (!response) {
const element = taskArr[i];
const prompt = await this.#createPrompt({ task: element, page });
let res: any = preprocessJsonInput(await this.#openAIRequest({ chatApi, prompt }));
const dependencies = [
{ param: "page", value: page },
];

const func = AsyncFunction(...dependencies.map((d) => d.param), res);
const args = dependencies.map((d) => d.value);

try {
const res = await func(...args);
if (res) {
response = res;
}

} catch (error: any) {
console.log(error);
}
}
}

await browser.close();
return response;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { AutoPlayWriteWebPageScrapper } from "../../../../dist/scraper/src/index.js";
import { describe, expect, it } from 'vitest'

describe("should scrape the page", async () => {
it("should scrape the text and return", async () => {

const url = "https://en.wikipedia.org/wiki/Akbar"
const scrapper = new AutoPlayWriteWebPageScrapper();

const result = await scrapper.getContent(url);
expect(`${result}`).contains("Akbar")
});
});
12 changes: 12 additions & 0 deletions JS/edgechains/arakoodev/src/scraper/src/tests/cheerio.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { Cheerio } from "../../../../dist/scraper/src/index.js";
import { describe, expect, it } from 'vitest'
describe("should scrape the page", async () => {
it("should scrape the text and return", async () => {

const url = "https://en.wikipedia.org/wiki/Akbar"
const cheerio = new Cheerio();

const result = await cheerio.getContent(url);
expect(`${result}`).contains("Akbar")
});
});
Loading

0 comments on commit 2a01739

Please sign in to comment.