Big update

Franri3008 · Dec 19, 2024 · cf607c4 · cf607c4
1 parent 0fda8e9
commit cf607c4
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 15 deletions.
diff --git a/pages/Wheels/wheel2.html b/pages/Wheels/wheel2.html
@@ -22,15 +22,10 @@
         .central-circle {
             transition: fill 0.5s ease;
         }
-        .circle-icon {
-            pointer-events: none;
-            filter: brightness(0) invert(1);
-        }
         .full-group {
             transition: transform 0.2s ease;
             transform-origin: center center;
         }
-        .interactive-circle {}
         .line {
             transition: stroke-width 0.2s ease, stroke 0.2s ease, stroke-dasharray 0.2s ease;
         }
@@ -85,7 +80,8 @@
             const mainSizeFactor = central.nameSizeFactor || 0.25; 
             const subSizeFactor = central.subnameSizeFactor || 2.0;
             const centralCircleSize = central.centralCircleSize || 0.3;
-            const smallCircleSize = central.smallCircleSize || 0.175; // default equivalent of previous 0.05*3.5
+            const smallCircleSize = central.smallCircleSize || 0.175; 
+            const iconTransform = (central.icon_transform === null || central.icon_transform === "None") ? null : central.icon_transform;
 
             svg.innerHTML = '';
             const topMargin = 110;
@@ -213,7 +209,35 @@
                 icon.setAttribute('y', y - iconSize / 2);
                 icon.setAttribute('width', iconSize);
                 icon.setAttribute('height', iconSize);
-                icon.setAttribute('class', 'circle-icon');
+
+                // Apply icon transform if specified
+                if (iconTransform) {
+                    // Currently they are being transformed to white with brightness/invert.
+                    // We'll do the same but if we want a custom color, we can approximate.
+                    // For simplicity, if icon_transform is a hex, we transform the icon from black to white,
+                    // then use a CSS filter approximation.
+                    // If user wants the exact color, they'd need inline SVG manipulation, 
+                    // but here we show a conceptual approach.
+
+                    // If user provided a hex color, we try to colorize it:
+                    // We'll first turn it white:
+                    let filterStr = 'brightness(0) invert(1)'; 
+                    // If the color is not white (#ffffff), we can try a rough approximation with sepia/hue-rotate
+                    if (iconTransform.toLowerCase() !== '#ffffff') {
+                        // This won't produce exact matches for all colors, but it's a demonstration.
+                        // For a generic approach, you'd need a more complex calculation.
+                        // Let's try a simple sepia and hue-rotate:
+                        // We assume user picks a color and we try to approximate:
+                        filterStr += ' sepia(1) saturate(10000%) hue-rotate(0deg)'; 
+                        // With no hue-rotate, this gives a reddish tone. 
+                        // Perfect color mapping is complex via CSS filters.
+                    }
+                    icon.setAttribute('style', `filter: ${filterStr};`);
+                } else {
+                    // No transform
+                    icon.removeAttribute('style');
+                }
+
                 circleGroup.appendChild(icon);
 
                 const textOffset = 80;
@@ -301,6 +325,7 @@
                     const translateY = -dy * (scaleFactor - 1);
                     fullGroup.setAttribute('transform', `translate(${translateX}, ${translateY}) scale(${scaleFactor})`);
                     centralCircle.setAttribute('fill', circleColor);
+                    centralCircle.setAttribute('stroke', 'none'); // remove black outline on hover
                     centralCircle.setAttribute('style', `filter: drop-shadow(0 0 20px ${circleColor});`);
                     const hoverStrokeWidth = originalStrokeWidth * 20;
                     circleGroup.lineElement.setAttribute('stroke-width', hoverStrokeWidth.toString());
@@ -321,6 +346,7 @@
                 circleGroup.addEventListener('mouseleave', () => {
                     fullGroup.setAttribute('transform', '');
                     centralCircle.setAttribute('fill', '#FFFFFF');
+                    centralCircle.setAttribute('stroke', '#000000'); // restore outline on mouse leave
                     centralCircle.removeAttribute('style');
                     circleGroup.lineElement.setAttribute('stroke-width', originalStrokeWidth.toString());
                     circleGroup.lineElement.setAttribute('stroke', '#000000');

diff --git a/pages/Wheels/wheel2.json b/pages/Wheels/wheel2.json
@@ -5,16 +5,17 @@
       "color": "#111111",
       "nameSizeFactor": 0.20,
       "subnameSizeFactor": 1.65,
-      "centralCircleSize": 0.3,
-      "smallCircleSize": 0.175
+      "centralCircleSize": 0.40,
+      "smallCircleSize": 0.135,
+      "icon_transform": "None"
     },
     "items": [
       {
         "name": "FineWeb",
         "subname": "HuggingFaceFW",
         "bullets": ["Cleaned and deduplicated english web data from CommonCrawl", "93.4 TBs"],
         "description": "Contains 15T-tokens of cleaned and deduplicated english web data from CommonCrawl. Curated for large-scale LLM training. Models trained on this data show superiority over models trained on other datasets like C4, Dolma, and RedPajama.\nEstimated number of rows: 45,995,362,478.\nSize of auto-converted Parquet files: 93.4 TB. Key feature: As of today, the largest publicly available, high-quality web dataset.",
-        "icon": "https://drive.google.com/file/d/14NfJZIUWspK80MKoAfLbekNcIkL4Xnjm/view?usp=drive_link",
+        "icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
         "url": "https://huggingface.co/datasets/HuggingFaceFW/fineweb",
         "color": "#FFD21E",
         "x": 0.0,
@@ -25,7 +26,7 @@
         "subname": "Microsoft",
         "bullets": ["Designed to train models for instruction-following tasks", "Prompts and responses are synthetically generated by AgentInstruct"],
         "description": "Designed to train models for instruction-following tasks like text creative writing, coding or reading comprehension. Both the prompts and the responses of this dataset are synthetically generated by AgentInstruct, using only raw text content publicly avialble on the Web as seeds.\nNumber of rows: 1,046,410\nSize of auto-converted Parquet files: 2.21 GB",
-        "icon": "https://drive.google.com/file/d/1m-jM0n5aA57FbmRa7RLuXJhOC-QpDpun/view?usp=drive_link",
+        "icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png",
         "url": "https://huggingface.co/datasets/microsoft/orca-agentinstruct-1M-v1",
         "color": "#5086BC",
         "x": 0.0,
@@ -36,7 +37,7 @@
         "subname": "Neuralwork",
         "bullets": ["Curated for question-answering tasks", "Data is converted to highly readable (.mmd) format"],
         "description": "The largest open and permissible licensed text dataset, comprising over 2 trillion tokens (2,003,039,184,047 tokens). Contains a diverse set of sources such as books, newspapers, scientific articles, government and legal documents, code, and more.\nEstimated number of rows: 396,953,971\nSize of auto-converted Parquet files (First 5GB): 2.96 GB\nKey feature: Data is permissively licensed, meaning it can be used, modified, and redistributed without legal ambiguity or risk of infringement.",
-        "icon": "https://drive.google.com/file/d/10OJ348DMNH3nd8yJBqyP44unwQPL-x9a/view?usp=drive_link",
+        "icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/6329b0cabdb6242b42b8cd63/7T7rS_-BL7wLWMDiCZgs7.png",
         "url": "https://huggingface.co/datasets/neuralwork/arxiver",
         "color": "#2FAD3B",
         "x": 0.0,
@@ -58,7 +59,7 @@
         "subname": "HuggingFaceTB",
         "bullets": ["Designed for supervised finetuning (SFT) of LLMs", "Curated to strengthen model capabilities such as mathematics and coding"],
         "description": "Synthetic dataset designed for supervised finetuning (SFT) of LLMs. It focuses on bridging the performance gap between models trained on SFT datasets and those trained on proprietary instruction datasets.\nNumber of rows: 2,197,730\nSize of the auto-converted Parquet files: 4.15 GB\nKey feature: While curated for SFT, the dataset also aims at improving on instruction following tasks.",
-        "icon": "https://drive.google.com/file/d/1QhQuw1-tpEG9T0KzJjsiHQxxQqi9z9PZ/view?usp=drive_link",
+        "icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
         "url": "https://huggingface.co/datasets/HuggingFaceTB/smoltalk",
         "color": "#ffd11e",
         "x": 0.0,
@@ -113,7 +114,7 @@
         "subname": "HuggingFaceFV",
         "bullets": ["Curated to fine-tune AI for video understanding tasks", "Focuses on of mood analysis and storytelling in multimodal settings"],
         "description": "Focuses on capturing the emotional journey and narrative flow of videos, thereby giving researchers the ingredients to cook up more context-savvy video analysis models.",
-        "icon": "https://drive.google.com/file/d/1PlYICoY1Zx0lL8PTcdrj26B9W87vKS0c/view?usp=drive_link",
+        "icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
         "url": "https://huggingface.co/datasets/HuggingFaceFV/finevideo",
         "color": "#ffd11e",
         "x": 0.0,
@@ -124,7 +125,7 @@
         "subname": "HuggingFaceFW",
         "bullets": ["Contains educational web page data filtered from FineWeb", "Uses an educational classifier to retain high-quality data"],
         "description": "A 1.3T-token dataset of educational web pages filtered from FineWeb.\nNumber of rows: 3,004,505,493\nSize of the auto-converted Parquet files: 8.84 TB\nKey feature: The dataset uses an AI-trained educational classifier to retain only the most educational web pages.",
-        "icon": "https://drive.google.com/file/d/1jzAp9yMt7GJlsg5z47fCGelwwdduS_R0/view?usp=drive_link",
+        "icon": "https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
         "url": "https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu",
         "color": "#ffd11e",
         "x": 0.0,
@@ -140,6 +141,39 @@
         "color": "#48AB73",
         "x": 0.0,
         "y": -0.05
+      },
+      {
+        "name": "reasoning-base-20k",
+        "subname": "KingNish",
+        "bullets": ["Designed to train reasoning models", "Number of rows: 19,944"],
+        "description": "A dataset designed to train reasoning models to think through complex problems with detailed chains of thought and answers, for refined, human-like reasoning.\nNumber of rows: 19,944\nSize of the auto-converted Parquet files: 118 MB\nKey feature: Focus on complex reasoning problems presented in a Chain of Thought (CoT) format. ",
+        "icon": "https://drive.google.com/file/d/1XFQoBzYrZer31S3icqZdhDVttz9j9yvP/view?usp=drive_link",
+        "url": "https://huggingface.co/datasets/KingNish/reasoning-base-20k",
+        "color": "#EA4A7D",
+        "x": 0.0,
+        "y": -0.05
+      },
+      {
+        "name": "xlam-function-calling-60k",
+        "subname": "Salesforce",
+        "bullets": ["Verified data for function-calling applications", "95% correctness rate "],
+        "description": "Provides high-quality, verified data for function-calling applications, with a 95% correctness rate ensured through rigorous hierarchical verification.\nKey feature: Data is verified at three hierarchical levels: Format verification, actual function execution and semantic verification, ensuring its reliability and correctness.",
+        "icon": "https://drive.google.com/file/d/1atbEeWkO0ANarKopnwd6s3Uf1mggkQ3h/view?usp=drive_link",
+        "url": "https://huggingface.co/datasets/Salesforce/xlam-function-calling-60k",
+        "color": "#6182C4",
+        "x": 0.0,
+        "y": -0.05
+      },
+      {
+        "name": "FRAMES",
+        "subname": "Google",
+        "bullets": ["Designed to test capabilities of Retrieval-Augmented Generation", "Questions span diverse topics from history to science"],
+        "description": "A comprehensive evaluation dataset designed to test the capabilities of Retrieval-Augmented Generation (RAG) systems across factuality, retrieval accuracy, and reasoning.\nNumber of rows: 824\nSize of the auto-converted Parquet files: 236 kB\nKey feature:",
+        "icon": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png",
+        "url": "https://huggingface.co/datasets/google/frames-benchmark",
+        "color": "#70A1D1",
+        "x": 0.0,
+        "y": -0.05
       }
     ]
   }