Skip to content

Commit

Permalink
Big update
Browse files Browse the repository at this point in the history
  • Loading branch information
Franri3008 committed Dec 19, 2024
1 parent 0fda8e9 commit cf607c4
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 15 deletions.
40 changes: 33 additions & 7 deletions pages/Wheels/wheel2.html
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,10 @@
.central-circle {
transition: fill 0.5s ease;
}
.circle-icon {
pointer-events: none;
filter: brightness(0) invert(1);
}
.full-group {
transition: transform 0.2s ease;
transform-origin: center center;
}
.interactive-circle {}
.line {
transition: stroke-width 0.2s ease, stroke 0.2s ease, stroke-dasharray 0.2s ease;
}
Expand Down Expand Up @@ -85,7 +80,8 @@
const mainSizeFactor = central.nameSizeFactor || 0.25;
const subSizeFactor = central.subnameSizeFactor || 2.0;
const centralCircleSize = central.centralCircleSize || 0.3;
const smallCircleSize = central.smallCircleSize || 0.175; // default equivalent of previous 0.05*3.5
const smallCircleSize = central.smallCircleSize || 0.175;
const iconTransform = (central.icon_transform === null || central.icon_transform === "None") ? null : central.icon_transform;

svg.innerHTML = '';
const topMargin = 110;
Expand Down Expand Up @@ -213,7 +209,35 @@
icon.setAttribute('y', y - iconSize / 2);
icon.setAttribute('width', iconSize);
icon.setAttribute('height', iconSize);
icon.setAttribute('class', 'circle-icon');

// Apply icon transform if specified
if (iconTransform) {
// Currently they are being transformed to white with brightness/invert.
// We'll do the same but if we want a custom color, we can approximate.
// For simplicity, if icon_transform is a hex, we transform the icon from black to white,
// then use a CSS filter approximation.
// If user wants the exact color, they'd need inline SVG manipulation,
// but here we show a conceptual approach.

// If user provided a hex color, we try to colorize it:
// We'll first turn it white:
let filterStr = 'brightness(0) invert(1)';
// If the color is not white (#ffffff), we can try a rough approximation with sepia/hue-rotate
if (iconTransform.toLowerCase() !== '#ffffff') {
// This won't produce exact matches for all colors, but it's a demonstration.
// For a generic approach, you'd need a more complex calculation.
// Let's try a simple sepia and hue-rotate:
// We assume user picks a color and we try to approximate:
filterStr += ' sepia(1) saturate(10000%) hue-rotate(0deg)';
// With no hue-rotate, this gives a reddish tone.
// Perfect color mapping is complex via CSS filters.
}
icon.setAttribute('style', `filter: ${filterStr};`);
} else {
// No transform
icon.removeAttribute('style');
}

circleGroup.appendChild(icon);

const textOffset = 80;
Expand Down Expand Up @@ -301,6 +325,7 @@
const translateY = -dy * (scaleFactor - 1);
fullGroup.setAttribute('transform', `translate(${translateX}, ${translateY}) scale(${scaleFactor})`);
centralCircle.setAttribute('fill', circleColor);
centralCircle.setAttribute('stroke', 'none'); // remove black outline on hover
centralCircle.setAttribute('style', `filter: drop-shadow(0 0 20px ${circleColor});`);
const hoverStrokeWidth = originalStrokeWidth * 20;
circleGroup.lineElement.setAttribute('stroke-width', hoverStrokeWidth.toString());
Expand All @@ -321,6 +346,7 @@
circleGroup.addEventListener('mouseleave', () => {
fullGroup.setAttribute('transform', '');
centralCircle.setAttribute('fill', '#FFFFFF');
centralCircle.setAttribute('stroke', '#000000'); // restore outline on mouse leave
centralCircle.removeAttribute('style');
circleGroup.lineElement.setAttribute('stroke-width', originalStrokeWidth.toString());
circleGroup.lineElement.setAttribute('stroke', '#000000');
Expand Down
50 changes: 42 additions & 8 deletions pages/Wheels/wheel2.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
"color": "#111111",
"nameSizeFactor": 0.20,
"subnameSizeFactor": 1.65,
"centralCircleSize": 0.3,
"smallCircleSize": 0.175
"centralCircleSize": 0.40,
"smallCircleSize": 0.135,
"icon_transform": "None"
},
"items": [
{
"name": "FineWeb",
"subname": "HuggingFaceFW",
"bullets": ["Cleaned and deduplicated english web data from CommonCrawl", "93.4 TBs"],
"description": "Contains 15T-tokens of cleaned and deduplicated english web data from CommonCrawl. Curated for large-scale LLM training. Models trained on this data show superiority over models trained on other datasets like C4, Dolma, and RedPajama.\nEstimated number of rows: 45,995,362,478.\nSize of auto-converted Parquet files: 93.4 TB. Key feature: As of today, the largest publicly available, high-quality web dataset.",
"icon": "https://drive.google.com/file/d/14NfJZIUWspK80MKoAfLbekNcIkL4Xnjm/view?usp=drive_link",
"icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
"url": "https://huggingface.co/datasets/HuggingFaceFW/fineweb",
"color": "#FFD21E",
"x": 0.0,
Expand All @@ -25,7 +26,7 @@
"subname": "Microsoft",
"bullets": ["Designed to train models for instruction-following tasks", "Prompts and responses are synthetically generated by AgentInstruct"],
"description": "Designed to train models for instruction-following tasks like text creative writing, coding or reading comprehension. Both the prompts and the responses of this dataset are synthetically generated by AgentInstruct, using only raw text content publicly avialble on the Web as seeds.\nNumber of rows: 1,046,410\nSize of auto-converted Parquet files: 2.21 GB",
"icon": "https://drive.google.com/file/d/1m-jM0n5aA57FbmRa7RLuXJhOC-QpDpun/view?usp=drive_link",
"icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png",
"url": "https://huggingface.co/datasets/microsoft/orca-agentinstruct-1M-v1",
"color": "#5086BC",
"x": 0.0,
Expand All @@ -36,7 +37,7 @@
"subname": "Neuralwork",
"bullets": ["Curated for question-answering tasks", "Data is converted to highly readable (.mmd) format"],
"description": "The largest open and permissible licensed text dataset, comprising over 2 trillion tokens (2,003,039,184,047 tokens). Contains a diverse set of sources such as books, newspapers, scientific articles, government and legal documents, code, and more.\nEstimated number of rows: 396,953,971\nSize of auto-converted Parquet files (First 5GB): 2.96 GB\nKey feature: Data is permissively licensed, meaning it can be used, modified, and redistributed without legal ambiguity or risk of infringement.",
"icon": "https://drive.google.com/file/d/10OJ348DMNH3nd8yJBqyP44unwQPL-x9a/view?usp=drive_link",
"icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/6329b0cabdb6242b42b8cd63/7T7rS_-BL7wLWMDiCZgs7.png",
"url": "https://huggingface.co/datasets/neuralwork/arxiver",
"color": "#2FAD3B",
"x": 0.0,
Expand All @@ -58,7 +59,7 @@
"subname": "HuggingFaceTB",
"bullets": ["Designed for supervised finetuning (SFT) of LLMs", "Curated to strengthen model capabilities such as mathematics and coding"],
"description": "Synthetic dataset designed for supervised finetuning (SFT) of LLMs. It focuses on bridging the performance gap between models trained on SFT datasets and those trained on proprietary instruction datasets.\nNumber of rows: 2,197,730\nSize of the auto-converted Parquet files: 4.15 GB\nKey feature: While curated for SFT, the dataset also aims at improving on instruction following tasks.",
"icon": "https://drive.google.com/file/d/1QhQuw1-tpEG9T0KzJjsiHQxxQqi9z9PZ/view?usp=drive_link",
"icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
"url": "https://huggingface.co/datasets/HuggingFaceTB/smoltalk",
"color": "#ffd11e",
"x": 0.0,
Expand Down Expand Up @@ -113,7 +114,7 @@
"subname": "HuggingFaceFV",
"bullets": ["Curated to fine-tune AI for video understanding tasks", "Focuses on of mood analysis and storytelling in multimodal settings"],
"description": "Focuses on capturing the emotional journey and narrative flow of videos, thereby giving researchers the ingredients to cook up more context-savvy video analysis models.",
"icon": "https://drive.google.com/file/d/1PlYICoY1Zx0lL8PTcdrj26B9W87vKS0c/view?usp=drive_link",
"icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
"url": "https://huggingface.co/datasets/HuggingFaceFV/finevideo",
"color": "#ffd11e",
"x": 0.0,
Expand All @@ -124,7 +125,7 @@
"subname": "HuggingFaceFW",
"bullets": ["Contains educational web page data filtered from FineWeb", "Uses an educational classifier to retain high-quality data"],
"description": "A 1.3T-token dataset of educational web pages filtered from FineWeb.\nNumber of rows: 3,004,505,493\nSize of the auto-converted Parquet files: 8.84 TB\nKey feature: The dataset uses an AI-trained educational classifier to retain only the most educational web pages.",
"icon": "https://drive.google.com/file/d/1jzAp9yMt7GJlsg5z47fCGelwwdduS_R0/view?usp=drive_link",
"icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
"url": "https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu",
"color": "#ffd11e",
"x": 0.0,
Expand All @@ -140,6 +141,39 @@
"color": "#48AB73",
"x": 0.0,
"y": -0.05
},
{
"name": "reasoning-base-20k",
"subname": "KingNish",
"bullets": ["Designed to train reasoning models", "Number of rows: 19,944"],
"description": "A dataset designed to train reasoning models to think through complex problems with detailed chains of thought and answers, for refined, human-like reasoning.\nNumber of rows: 19,944\nSize of the auto-converted Parquet files: 118 MB\nKey feature: Focus on complex reasoning problems presented in a Chain of Thought (CoT) format. ",
"icon": "https://drive.google.com/file/d/1XFQoBzYrZer31S3icqZdhDVttz9j9yvP/view?usp=drive_link",
"url": "https://huggingface.co/datasets/KingNish/reasoning-base-20k",
"color": "#EA4A7D",
"x": 0.0,
"y": -0.05
},
{
"name": "xlam-function-calling-60k",
"subname": "Salesforce",
"bullets": ["Verified data for function-calling applications", "95% correctness rate "],
"description": "Provides high-quality, verified data for function-calling applications, with a 95% correctness rate ensured through rigorous hierarchical verification.\nKey feature: Data is verified at three hierarchical levels: Format verification, actual function execution and semantic verification, ensuring its reliability and correctness.",
"icon": "https://drive.google.com/file/d/1atbEeWkO0ANarKopnwd6s3Uf1mggkQ3h/view?usp=drive_link",
"url": "https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k",
"color": "#6182C4",
"x": 0.0,
"y": -0.05
},
{
"name": "FRAMES",
"subname": "Google",
"bullets": ["Designed to test capabilities of Retrieval-Augmented Generation", "Questions span diverse topics from history to science"],
"description": "A comprehensive evaluation dataset designed to test the capabilities of Retrieval-Augmented Generation (RAG) systems across factuality, retrieval accuracy, and reasoning.\nNumber of rows: 824\nSize of the auto-converted Parquet files: 236 kB\nKey feature:",
"icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png",
"url": "https://huggingface.co/datasets/google/frames-benchmark",
"color": "#70A1D1",
"x": 0.0,
"y": -0.05
}
]
}
Expand Down

0 comments on commit cf607c4

Please sign in to comment.