From 2e8b2a77cd96e62617f88e98594e26a4e3d2fc7e Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 25 Feb 2015 00:45:35 +0200 Subject: [PATCH 1/5] implement domainExtractor for image and title, with a single implementation wikipedia --- fixtures/test_wikipedia1.json | 11 +++++++ src/domainExtractor.coffee | 29 +++++++++++++++++++ src/domain_extractors/en.wikipedia.org.coffee | 23 +++++++++++++++ src/extractor.coffee | 20 +++++++++++-- src/unfluff.coffee | 11 ++++--- test/domainExtractor.coffee | 8 +++++ test/unfluff.coffee | 3 ++ 7 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 fixtures/test_wikipedia1.json create mode 100644 src/domainExtractor.coffee create mode 100644 src/domain_extractors/en.wikipedia.org.coffee create mode 100644 test/domainExtractor.coffee diff --git a/fixtures/test_wikipedia1.json b/fixtures/test_wikipedia1.json new file mode 100644 index 0000000..2c7fa16 --- /dev/null +++ b/fixtures/test_wikipedia1.json @@ -0,0 +1,11 @@ +{ + "url": "http://en.wikipedia.org/wiki/Now_and_Then,_Here_and_There", + "expected": { + "domain": "en.wikipedia.org", + "title": "Now and Then, Here and There", + "cleaned_text": "SAN FRANCISCO (AP) \u2014 Steve Jobs, the mind behind the iPhone", + "meta_favicon": "//bits.wikimedia.org/favicon/wikipedia.ico", + "meta_lang": "en", + "image": "//upload.wikimedia.org/wikipedia/en/thumb/1/10/Now_and_Then_Here_and_There.png/230px-Now_and_Then_Here_and_There.png" + } +} \ No newline at end of file diff --git a/src/domainExtractor.coffee b/src/domainExtractor.coffee new file mode 100644 index 0000000..152b1df --- /dev/null +++ b/src/domainExtractor.coffee @@ -0,0 +1,29 @@ +path = require('path') +fs = require('fs') +_ = require('lodash') +{XRegExp} = require('xregexp') + +cache = {} + +getFilePath = (domain) -> + path.join(__dirname, "domain_extractors", "#{domain}.coffee") + +module.exports = domainExtractors = (url) -> + domain = extractDomain(url) + if cache.hasOwnProperty(domain) + domainExtractor = cache[domain] + else + filePath = getFilePath(domain) + if !fs.existsSync(filePath) + #console.log("No domainExtractor file found for '#{domain}'") + filePath = null + cache[domain] = null + else + #console.log("Found domainExtractor file found for '#{domain}'") + domainExtractor = require(filePath) + cache[domain] = domainExtractor + return domainExtractor + +extractDomain = (url) -> + domainRegex = XRegExp('[a-zA-Z]*:*//(?[a-zA-Z0-9\\-\\.]+)/.*') + domain = XRegExp.replace(url, domainRegex, '${domain}') \ No newline at end of file diff --git a/src/domain_extractors/en.wikipedia.org.coffee b/src/domain_extractors/en.wikipedia.org.coffee new file mode 100644 index 0000000..20790c0 --- /dev/null +++ b/src/domain_extractors/en.wikipedia.org.coffee @@ -0,0 +1,23 @@ +_ = require("lodash") + +module.exports = + image: (doc) -> + images = doc(".infobox img") + + if images.length > 0 && images.first().attr('src') + return images.first().attr('src') + + title: (doc) -> + titleElement = doc("title") + titleText = titleElement.text() + + return null unless titleElement + + usedDelimeter = false + _.each ["|", " - ", "»", ":"], (c) -> + if titleText.indexOf(c) >= 0 && !usedDelimeter + titlePieces = titleText.split(c) + titleText = titlePieces[0] + usedDelimeter = true + + titleText.replace(/�/g, "").trim() \ No newline at end of file diff --git a/src/extractor.coffee b/src/extractor.coffee index acd16a5..3e13d10 100644 --- a/src/extractor.coffee +++ b/src/extractor.coffee @@ -1,10 +1,18 @@ _ = require("lodash") stopwords = require("./stopwords") formatter = require("./formatter") +domainsExtractor = require("./domainExtractor") module.exports = # Grab the title of an html doc (excluding junk) - title: (doc) -> + title: (doc,url) -> + if url + domainExtractor = domainsExtractor(url) + if domainExtractor && domainExtractor.title != undefined + title = domainExtractor.title(doc) + return title unless ! title + + titleElement = doc("meta[property='og:title']") titleText = titleElement.attr("content") if titleElement @@ -31,8 +39,14 @@ module.exports = "" # Grab an image for the page - image: (doc) -> - images = doc("meta[property='og:image'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0']") + image: (doc,url) -> + if url + domainExtractor = domainsExtractor(url) + if domainExtractor && domainExtractor.image != undefined + image = domainExtractor.image(doc) + return image unless ! image + + images = doc("meta[property='og:image'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0'], .infobox img[src]") if images.length > 0 && images.first().attr('content') return images.first().attr('content') diff --git a/src/unfluff.coffee b/src/unfluff.coffee index 9d5a20e..b99d1f4 100644 --- a/src/unfluff.coffee +++ b/src/unfluff.coffee @@ -5,16 +5,17 @@ cleaner = require("./cleaner") module.exports = unfluff = (html, language) -> doc = cheerio.load(html) lng = language || extractor.lang(doc) + url = extractor.canonicalLink(doc) || extractor.favicon(doc) pageData = - title: extractor.title(doc) + title: extractor.title(doc,url) favicon: extractor.favicon(doc) description: extractor.description(doc) keywords: extractor.keywords(doc) lang: lng canonicalLink: extractor.canonicalLink(doc) tags: extractor.tags(doc) - image: extractor.image(doc) + image: extractor.image(doc,url) # Step 1: Clean the doc cleaner(doc) @@ -32,7 +33,8 @@ module.exports = unfluff = (html, language) -> unfluff.lazy = (html, language) -> title: () -> doc = getParsedDoc.call(this, html) - @title_ ?= extractor.title(doc) + url = extractor.canonicalLink(doc) || extractor.favicon(doc) + @title_ ?= extractor.title(doc,url) favicon: () -> doc = getParsedDoc.call(this, html) @@ -60,7 +62,8 @@ unfluff.lazy = (html, language) -> image: () -> doc = getParsedDoc.call(this, html) - @image_ ?= extractor.image(doc) + url = extractor.canonicalLink(doc) || extractor.favicon(doc) + @image_ ?= extractor.image(doc,url) videos: () -> return @videos_ if @videos_? diff --git a/test/domainExtractor.coffee b/test/domainExtractor.coffee new file mode 100644 index 0000000..aa90851 --- /dev/null +++ b/test/domainExtractor.coffee @@ -0,0 +1,8 @@ +suite 'DomainExtractor', -> + domainExtractor = require("../src/domainExtractor") + + test 'exists', -> + ok domainExtractor + + test 'en.wikipedia.com', -> + ok domainExtractor('http://en.wikipedia.org/wiki/Thomas_Edison') \ No newline at end of file diff --git a/test/unfluff.coffee b/test/unfluff.coffee index 68a7cb2..a142107 100644 --- a/test/unfluff.coffee +++ b/test/unfluff.coffee @@ -105,6 +105,9 @@ suite 'Unfluff', -> checkFixture('polygon' , ['image']) checkFixture('theverge1' , ['image']) + test 'using domain extractor', -> + checkFixture('wikipedia1' , ['image','title']) + test 'gets cleaned text - Polygon', -> checkFixture('polygon' , ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon']) From 278a761ad2f05604644ea5c528f876b1366a3511 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 25 Feb 2015 10:32:50 +0200 Subject: [PATCH 2/5] Change matching domain so tht wikipedia.org works for en.wikipedia.org and it.wikipedia.org --- src/domainExtractor.coffee | 40 ++++++++++++------- ...ipedia.org.coffee => wikipedia.org.coffee} | 0 test/domainExtractor.coffee | 8 +++- 3 files changed, 33 insertions(+), 15 deletions(-) rename src/domain_extractors/{en.wikipedia.org.coffee => wikipedia.org.coffee} (100%) diff --git a/src/domainExtractor.coffee b/src/domainExtractor.coffee index 152b1df..4d21c6a 100644 --- a/src/domainExtractor.coffee +++ b/src/domainExtractor.coffee @@ -9,21 +9,33 @@ getFilePath = (domain) -> path.join(__dirname, "domain_extractors", "#{domain}.coffee") module.exports = domainExtractors = (url) -> - domain = extractDomain(url) - if cache.hasOwnProperty(domain) - domainExtractor = cache[domain] - else - filePath = getFilePath(domain) - if !fs.existsSync(filePath) - #console.log("No domainExtractor file found for '#{domain}'") - filePath = null - cache[domain] = null + domains = extractDomains(url) + domainExtractor = null + _.each domains, (domain) -> + if cache.hasOwnProperty(domain) + domainExtractor = cache[domain] else - #console.log("Found domainExtractor file found for '#{domain}'") - domainExtractor = require(filePath) - cache[domain] = domainExtractor + filePath = getFilePath(domain) + if !fs.existsSync(filePath) + filePath = null + else + domainExtractor = require(filePath) + cache[domain] = domainExtractor return domainExtractor -extractDomain = (url) -> +extractDomains = (url) -> domainRegex = XRegExp('[a-zA-Z]*:*//(?[a-zA-Z0-9\\-\\.]+)/.*') - domain = XRegExp.replace(url, domainRegex, '${domain}') \ No newline at end of file + domains = [] + domain = XRegExp.replace(url, domainRegex, '${domain}') + domains.push domain + splitDomain = domain.split('.') + # The idea of the subdomain is to try to match wikipedia.org from en.wikipedia.org. + # So the minimum parts to domain is 2. + # Still the length of the text should be bigger then 2 characters, to avoid using only the TLD like co.il + _.each splitDomain, (subDomain,index) -> + + if splitDomain.length - index >= 3 || (splitDomain.length - index == 3 && subDomain.length > 2 ) + domain = domain.substr(subDomain.length+1) + domains.push domain + + return domains \ No newline at end of file diff --git a/src/domain_extractors/en.wikipedia.org.coffee b/src/domain_extractors/wikipedia.org.coffee similarity index 100% rename from src/domain_extractors/en.wikipedia.org.coffee rename to src/domain_extractors/wikipedia.org.coffee diff --git a/test/domainExtractor.coffee b/test/domainExtractor.coffee index aa90851..6f4e353 100644 --- a/test/domainExtractor.coffee +++ b/test/domainExtractor.coffee @@ -5,4 +5,10 @@ suite 'DomainExtractor', -> ok domainExtractor test 'en.wikipedia.com', -> - ok domainExtractor('http://en.wikipedia.org/wiki/Thomas_Edison') \ No newline at end of file + ok domainExtractor('http://en.wikipedia.org/wiki/Thomas_Edison') + + test 'he.wikipedia.com', -> + ok domainExtractor('http://he.wikipedia.org/wiki/Thomas_Edison') + + test 'something.he.wikipedia.com', -> + ok domainExtractor('http://he.wikipedia.org/wiki/Thomas_Edison') \ No newline at end of file From 37de0c0be98945f35dc6901fb510b4d4bb21d29a Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 25 Feb 2015 11:48:12 +0200 Subject: [PATCH 3/5] update build files --- lib/domainExtractor.js | 47 ++++++++++++++++++++++++++ lib/domain_extractors/wikipedia.org.js | 35 +++++++++++++++++++ lib/extractor.js | 29 ++++++++++++---- lib/unfluff.js | 17 ++++++---- 4 files changed, 115 insertions(+), 13 deletions(-) create mode 100644 lib/domainExtractor.js create mode 100644 lib/domain_extractors/wikipedia.org.js diff --git a/lib/domainExtractor.js b/lib/domainExtractor.js new file mode 100644 index 0000000..1d9a4d7 --- /dev/null +++ b/lib/domainExtractor.js @@ -0,0 +1,47 @@ +// Generated by CoffeeScript 2.0.0-beta7 +void function () { + var _, cache, domainExtractors, extractDomains, fs, getFilePath, path, XRegExp; + path = require('path'); + fs = require('fs'); + _ = require('lodash'); + XRegExp = require('xregexp').XRegExp; + cache = {}; + getFilePath = function (domain) { + return path.join(__dirname, 'domain_extractors', '' + domain + '.coffee'); + }; + module.exports = domainExtractors = function (url) { + var domainExtractor, domains; + domains = extractDomains(url); + domainExtractor = null; + _.each(domains, function (domain) { + var filePath; + if (cache.hasOwnProperty(domain)) { + return domainExtractor = cache[domain]; + } else { + filePath = getFilePath(domain); + if (!fs.existsSync(filePath)) { + return filePath = null; + } else { + domainExtractor = require(filePath); + return cache[domain] = domainExtractor; + } + } + }); + return domainExtractor; + }; + extractDomains = function (url) { + var domain, domainRegex, domains, splitDomain; + domainRegex = XRegExp('[a-zA-Z]*:*//(?[a-zA-Z0-9\\-\\.]+)/.*'); + domains = []; + domain = XRegExp.replace(url, domainRegex, '${domain}'); + domains.push(domain); + splitDomain = domain.split('.'); + _.each(splitDomain, function (subDomain, index) { + if (splitDomain.length - index >= 3 || splitDomain.length - index === 3 && subDomain.length > 2) { + domain = domain.substr(subDomain.length + 1); + return domains.push(domain); + } + }); + return domains; + }; +}.call(this); diff --git a/lib/domain_extractors/wikipedia.org.js b/lib/domain_extractors/wikipedia.org.js new file mode 100644 index 0000000..58ba400 --- /dev/null +++ b/lib/domain_extractors/wikipedia.org.js @@ -0,0 +1,35 @@ +// Generated by CoffeeScript 2.0.0-beta7 +void function () { + var _; + _ = require('lodash'); + module.exports = { + image: function (doc) { + var images; + images = doc('.infobox img'); + if (images.length > 0 && images.first().attr('src')) + return images.first().attr('src'); + }, + title: function (doc) { + var titleElement, titleText, usedDelimeter; + titleElement = doc('title'); + titleText = titleElement.text(); + if (!titleElement) + return null; + usedDelimeter = false; + _.each([ + '|', + ' - ', + '\xbb', + ':' + ], function (c) { + var titlePieces; + if (titleText.indexOf(c) >= 0 && !usedDelimeter) { + titlePieces = titleText.split(c); + titleText = titlePieces[0]; + return usedDelimeter = true; + } + }); + return titleText.replace(/�/g, '').trim(); + } + }; +}.call(this); diff --git a/lib/extractor.js b/lib/extractor.js index b33370e..ecb6415 100644 --- a/lib/extractor.js +++ b/lib/extractor.js @@ -1,12 +1,21 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, addSiblings, biggestTitleChunk, formatter, getObjectTag, getScore, getSiblingsContent, getSiblingsScore, getVideoAttrs, isBoostable, isHighlinkDensity, isNodescoreThresholdMet, isTableAndNoParaExist, postCleanup, stopwords, updateNodeCount, updateScore; + var _, addSiblings, biggestTitleChunk, domainsExtractor, formatter, getObjectTag, getScore, getSiblingsContent, getSiblingsScore, getVideoAttrs, isBoostable, isHighlinkDensity, isNodescoreThresholdMet, isTableAndNoParaExist, postCleanup, stopwords, updateNodeCount, updateScore; _ = require('lodash'); stopwords = require('./stopwords'); formatter = require('./formatter'); + domainsExtractor = require('./domainExtractor'); module.exports = { - title: function (doc) { - var titleElement, titleText, usedDelimeter; + title: function (doc, url) { + var domainExtractor, title, titleElement, titleText, usedDelimeter; + if (url) { + domainExtractor = domainsExtractor(url); + if (domainExtractor && domainExtractor.title !== void 0) { + title = domainExtractor.title(doc); + if (!!title) + return title; + } + } titleElement = doc("meta[property='og:title']"); if (titleElement) titleText = titleElement.attr('content'); @@ -38,9 +47,17 @@ void function () { return ''; } }, - image: function (doc) { - var images; - images = doc("meta[property='og:image'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0']"); + image: function (doc, url) { + var domainExtractor, image, images; + if (url) { + domainExtractor = domainsExtractor(url); + if (domainExtractor && domainExtractor.image !== void 0) { + image = domainExtractor.image(doc); + if (!!image) + return image; + } + } + images = doc("meta[property='og:image'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0'], .infobox img[src]"); if (images.length > 0 && images.first().attr('content')) return images.first().attr('content'); return null; diff --git a/lib/unfluff.js b/lib/unfluff.js index a724ad5..8c8a969 100644 --- a/lib/unfluff.js +++ b/lib/unfluff.js @@ -5,18 +5,19 @@ void function () { extractor = require('./extractor'); cleaner = require('./cleaner'); module.exports = unfluff = function (html, language) { - var doc, lng, pageData, topNode; + var doc, lng, pageData, topNode, url; doc = cheerio.load(html); lng = language || extractor.lang(doc); + url = extractor.canonicalLink(doc) || extractor.favicon(doc); pageData = { - title: extractor.title(doc), + title: extractor.title(doc, url), favicon: extractor.favicon(doc), description: extractor.description(doc), keywords: extractor.keywords(doc), lang: lng, canonicalLink: extractor.canonicalLink(doc), tags: extractor.tags(doc), - image: extractor.image(doc) + image: extractor.image(doc, url) }; cleaner(doc); topNode = extractor.calculateBestNode(doc, lng); @@ -27,9 +28,10 @@ void function () { unfluff.lazy = function (html, language) { return { title: function () { - var doc; + var doc, url; doc = getParsedDoc.call(this, html); - return null != this.title_ ? this.title_ : this.title_ = extractor.title(doc); + url = extractor.canonicalLink(doc) || extractor.favicon(doc); + return null != this.title_ ? this.title_ : this.title_ = extractor.title(doc, url); }, favicon: function () { var doc; @@ -62,9 +64,10 @@ void function () { return null != this.tags_ ? this.tags_ : this.tags_ = extractor.tags(doc); }, image: function () { - var doc; + var doc, url; doc = getParsedDoc.call(this, html); - return null != this.image_ ? this.image_ : this.image_ = extractor.image(doc); + url = extractor.canonicalLink(doc) || extractor.favicon(doc); + return null != this.image_ ? this.image_ : this.image_ = extractor.image(doc, url); }, videos: function () { var doc, topNode; From 3c74e4b2a1de79df93ae02778a5236d5a21e1b52 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 25 Feb 2015 12:40:42 +0200 Subject: [PATCH 4/5] detect domain extractors file extention based on running file (coffee/js) --- lib/domainExtractor.js | 5 +++-- src/domainExtractor.coffee | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/domainExtractor.js b/lib/domainExtractor.js index 1d9a4d7..28df940 100644 --- a/lib/domainExtractor.js +++ b/lib/domainExtractor.js @@ -1,13 +1,14 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, cache, domainExtractors, extractDomains, fs, getFilePath, path, XRegExp; + var _, cache, domainExtractors, extension, extractDomains, fs, getFilePath, path, XRegExp; path = require('path'); fs = require('fs'); _ = require('lodash'); XRegExp = require('xregexp').XRegExp; cache = {}; + extension = __filename.substr(__filename.lastIndexOf('.') + 1); getFilePath = function (domain) { - return path.join(__dirname, 'domain_extractors', '' + domain + '.coffee'); + return path.join(__dirname, 'domain_extractors', '' + domain + '.' + extension); }; module.exports = domainExtractors = function (url) { var domainExtractor, domains; diff --git a/src/domainExtractor.coffee b/src/domainExtractor.coffee index 4d21c6a..2c388fb 100644 --- a/src/domainExtractor.coffee +++ b/src/domainExtractor.coffee @@ -4,9 +4,10 @@ _ = require('lodash') {XRegExp} = require('xregexp') cache = {} +extension = __filename.substr(__filename.lastIndexOf(".")+1) getFilePath = (domain) -> - path.join(__dirname, "domain_extractors", "#{domain}.coffee") + path.join(__dirname, "domain_extractors", "#{domain}.#{extension}") module.exports = domainExtractors = (url) -> domains = extractDomains(url) From 3283ae4e3b0bbda64e9ff5b721519199ac6dc300 Mon Sep 17 00:00:00 2001 From: danielgranat Date: Mon, 20 Feb 2017 22:02:28 +0200 Subject: [PATCH 5/5] Add hebrew stopwords --- data/stopwords/stopwords-he.txt | 499 ++++++++++++++++++++++++++++++++ 1 file changed, 499 insertions(+) create mode 100644 data/stopwords/stopwords-he.txt diff --git a/data/stopwords/stopwords-he.txt b/data/stopwords/stopwords-he.txt new file mode 100644 index 0000000..daf49e9 --- /dev/null +++ b/data/stopwords/stopwords-he.txt @@ -0,0 +1,499 @@ +את +לא +של +אני +על +זה +עם +כל +הוא +אם +או +גם +יותר +יש +לי +מה +אבל +פורום +אז +טוב +רק +כי +שלי +היה +בפורום +אין +עוד +היא +אחד +ב +ל +עד +לך +כמו +להיות +אתה +כמה +אנחנו +הם +כבר +אנשים +אפשר +תודה +שלא +אותו +ה +מאוד +הרבה +ולא +ממש +לו +א +מי +חיים +בית +שאני +יכול +שהוא +כך +הזה +איך +היום +קצת +עכשיו +שם +בכל +יהיה +תמיד +י +שלך +הכי +ש +בו +לעשות +צריך +כן +פעם +לכם +ואני +משהו +אל +שלו +שיש +ו +וגם +אתכם +אחרי +בנושא +כדי +פשוט +לפני +שזה +אותי +אנו +למה +דבר +כ +כאן +אולי +טובים +רוצה +שנה +בעלי +החיים +למען +אתם +מ +בין +יום +זאת +איזה +ביותר +לה +אחת +הכל +הפורומים +לכל +אלא +פה +יודע +שלום +דקות +לנו +השנה +דרך +אדם +נראה +זו +היחידה +רוצים +בכלל +טובה +שלנו +האם +הייתי +הלב +היו +ח +שדרות +בלי +להם +שאתה +אותה +מקום +ואתם +חלק +בן +בואו +אחר +האחת +אותך +כמובן +בגלל +באמת +מישהו +ילדים +אותם +הפורום +טיפוח +וזה +ר +שהם +אך +מזמין +ישראל +כוס +זמן +ועוד +הילדים +עדיין +כזה +עושה +שום +לקחת +העולם +תפוז +לראות +לפורום +וכל +לקבל +נכון +יוצא +לעולם +גדול +אפילו +ניתן +שני +אוכל +קשה +משחק +ביום +ככה +אמא +בת +השבוע +נוספים +לגבי +בבית +אחרת +לפי +ללא +שנים +הזמן +שמן +מעט +לפחות +אף +שוב +שלהם +במקום +כולם +נועית +הבא +מעל +לב +המון +לדבר +ע +אוהב +מוסיפים +חצי +בעיקר +כפות +לפעמים +שהיא +הנהלת +ועל +ק +אוהבים +ת +יודעת +ד +גרוע +שאנחנו +מים +לילדים +בארץ +מודיע +אשמח +שלכם +פחות +לכולם +די +אהבה +יכולה +דברים +הקהילה +לעזור +פרטים +בדיוק +מלח +קל +הראשי +שלה +להוסיף +השני +לדעתי +בר +למרות +שגם +מוזמנים +לאחר +במה +חושב +מאד +יפה +להגשים +חדש +קטן +מחפשים +שמח +מדברים +ואם +במיוחד +עבודה +מדי +ואז +חשוב +שאם +אוהבת +פעמים +מנהלת +אומר +מול +קשר +מנהל +שיהיה +שאין +שאנו +האהבה +ס +הצטרפו +כפית +בשביל +החגים +אופן +לתת +כף +בתוך +סוכר +גיל +בהצלחה +והוא +מקווה +סתם +ויש +נגד +כמעט +שאת +עולה +אי +מספר +ראשון +לדרך +נהיה +לעצב +עושים +ולנהל +היתה +עליו +מזה +הייתה +בא +בפרס +חלות +ראש +מזמינים +טיפים +מכבי +רבה +הורים +‡ +מקרה +קרן +המוצלח +להגיע +גדולה +כנראה +החמשיר +הראשון +פלפל +המשחק +וכאן +לדעת +ואת +גרועים +ספר +אגב +אחרים +להגיד +בתפוז +והעולם +אופנה +דווקא +מספיק +שעות +תמונות +כשאנחנו +שוקולד +ולכן +ג +לקרוא +לניהול +שבוע +ויופי +חלום +בה +שהיה +שאלה +מקומה +הזו +בפורומים +החדש +מתאמצים +שחקן +שמזינים +נשמת +בערך +מכל +ומה +רגל +כסף +רואה +קטנה +בצל +בעולם +אינטרנט +חוץ +ברור +הולך +חושבת +לזה +כלום +הן +כאלה +בטוח +הדבר +תהיה +מגיע +סוף +האמת +ממנו +מיכל +החדשה +לתרום +האנשים +ועד +בדרך +אצלי +ההורים +בני +מתוך +כאשר +לבד +ראיתי +מצב +מלא +לבחור +נשמח +החג +רע +עוף +מן +להביא +מצאתי +כתובות +מעניין +צריכה +להכנס +לחלוטין +שעה +מתכון +קודם +תשובות +מדובר +ניהול +מזל +כדאי +יהיו +ההודעות +בוקר +נילוות +איפה +בעיה +קמח +ללכת +פורומים +אמר +נושא +ההכנה +בבקשה +שכל +הזאת +למשחק +פנינה +תחרות +חבר +לקנות +מהם +רגע +גרם +אלו +עצמו +מראש +הכלב +כולנו +עדיף +איתו +למשל +לבשל +למי +רעיונות +הבלוג +רוב +אביב +כרגע +בסוף +אלה +לחג +ערוץ +שווה +באופן +מאמין +לבן +בזה +הכבוד +לראש +ם +ימי +שחור +בצורה +בעמוד +ועם +וחצי +האלה +תמונה +בשלב +משחקים +נו