diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..77bc094 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +buymeacoffee: fernandowip \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 618bf7e..888f78b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,10 +1,11 @@ name: Test SortGS - on: push: branches: [ master, dev ] pull_request: branches: [ master, dev ] + schedule: + - cron: '0 0 1 * *' # Run at 00:00 on the first of every month jobs: build: @@ -13,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] + python-version: ['3.10', '3.11'] steps: - uses: actions/checkout@v2 @@ -21,10 +22,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - name: Install the package run: pip install -e . - name: Test with unittest diff --git a/README.md b/README.md index 2f2841f..37aa4e4 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,20 @@ sortgs is a Python tool for ranking Google Scholar publications by the number of citations. It is useful for finding relevant papers in a specific field. The data acquired from Google Scholar includes Title, Citations, Links, Rank, and a new column with the number of citations per year. In the background, it first try to fetch results using python requests. If it fails, it will use selenium to fetch the results. -## Try on Google Colab: [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/run_sortgs_on_colab.ipynb) - - No install requirements! Limitations: Can't handle robot checking, so use it carefully. +## 🚀 Run it on Google Colab +- **No-Code Version (new!)**: [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/Sort_Google_Scholar_No_Code_Version.ipynb) — *No coding required! Perfect for a quick start!* ⚡ +- **Code Version:** [](https://colab.research.google.com/github/WittmannF/sort-google-scholar/blob/master/examples/run_sortgs_on_colab.ipynb)— *For developers who want full control of what's behind the scenes!* 💻 + +> 💡 **All you need** is a Google Account to get started. +> ⚠️ **Note**: Google Scholar may block access after too many repetitive requests due to CAPTCHA checks, so proceed mindfully! + +## 📚 Colab No-Code Instructions +https://github.com/user-attachments/assets/25de7bad-2a5d-4bcf-b486-faa1d7a29eb3 + ## Installation -You can now install `sortgs` directly using `pip`: +You can install `sortgs` directly using `pip`: ```bash pip install sortgs @@ -166,3 +174,9 @@ And check if all tests passes. Alternativelly send a PR, github actions will run ## LICENSE - MIT + +## Support My Work +If you find this project useful, consider supporting me: + +[![Buy Me a Coffee](https://img.shields.io/badge/-Buy%20Me%20a%20Coffee-ffdd00?style=flat&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/fernandowip) + diff --git a/setup.py b/setup.py index dda5845..a503ec3 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ 'matplotlib', 'selenium', ], + entry_points={ 'console_scripts': [ 'sortgs=sortgs:main', # This line sets up the command line tool diff --git a/test/test_sortgs.py b/test/test_sortgs.py index ccf117a..1146093 100644 --- a/test/test_sortgs.py +++ b/test/test_sortgs.py @@ -1,91 +1,55 @@ import unittest -from unittest.mock import patch -import sortgs import os import pandas as pd class TestSortGS(unittest.TestCase): @classmethod - def setUpClass(self): - '''run once before all tests''' - os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10 --endyear 2022") - self.df_top_10=pd.read_csv('machine_learning.csv') - - os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022") - self.df_top_20=pd.read_csv('machine_learning.csv') + def setUpClass(cls): + '''Run once before all tests''' + os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022") + cls.df_top_10_cli = pd.read_csv('machine_learning.csv') - os.system("python sortgs.py --debug --kw 'machine learning' --nresults 20 --endyear 2022 --sortby 'cit/year'") - self.df_top_sorted_cit_per_year=pd.read_csv('machine_learning.csv') + os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022 --sortby 'cit/year'") + cls.df_top_sorted_cit_per_year_cli = pd.read_csv('machine_learning.csv') - # Repeat the above, but testing the cli command - os.system("sortgs 'machine learning' --debug --nresults 10 --endyear 2022") - self.df_top_10_cli=pd.read_csv('machine_learning.csv') + def test_get_10_results_cli(self): + self.assertEqual(len(self.df_top_10_cli), 10) - os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022") - self.df_top_20_cli=pd.read_csv('machine_learning.csv') + def test_is_sorted_by_citations(self): + df = self.df_top_10_cli + top_citations = list(df.Citations.values[:5]) + self.assertEqual(top_citations, [3166, 2853, 2416, 948, 830]) - os.system("sortgs 'machine learning' --debug --nresults 20 --endyear 2022 --sortby 'cit/year'") - self.df_top_sorted_cit_per_year_cli=pd.read_csv('machine_learning.csv') - - def test_get_10_results(self): - self.assertEqual(len(self.df_top_10), 10) - - def test_get_20_results(self): - self.assertEqual(len(self.df_top_20), 20) - - def test_is_sorted(self): - df=self.df_top_20 - top_citations=list(df.Citations.values[:5]) - self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853]) - - def test_top_result(self): - df=self.df_top_20 - top_author = str(df.Author.values[0]) + def test_top_result_cli(self): + df = self.df_top_10_cli + top_author = str(df.Author.values[0]).strip() top_citation = int(df.Citations.values[0]) top_cit_per_year = int(df['cit/year'].values[0]) top_results = [top_author, top_citation, top_cit_per_year] - self.assertEqual(top_results, [' Bishop', 49230, 2896]) + self.assertEqual(top_results, ['Shale', 3166, 352]) def test_cit_per_year_sorted(self): - df=self.df_top_sorted_cit_per_year - top_citations=list(df.Citations.values[:5]) + df = self.df_top_sorted_cit_per_year_cli top_cit_per_year = list(df['cit/year'].values[:5]) - top_results = [top_citations, top_cit_per_year] - self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416], - [2896, 782, 571, 352, 302]]) + self.assertEqual(top_cit_per_year, [571, 352, 302, 85, 79]) def test_csv_exists(self): - os.system("python sortgs.py --debug --kw 'machine learning' --nresults 10") self.assertTrue(os.path.exists('machine_learning.csv')) - - def test_cli_get_10_results(self): - self.assertEqual(len(self.df_top_10_cli), 10) - - def test_cli_get_20_results(self): - self.assertEqual(len(self.df_top_20_cli), 20) - - def test_cli_is_sorted(self): - df=self.df_top_20_cli - top_citations=list(df.Citations.values[:5]) - self.assertEqual(top_citations, [49230, 8603, 3166, 3069, 2853]) - - def test_cli_top_result(self): - df=self.df_top_20_cli - top_author = str(df.Author.values[0]) - top_citation = int(df.Citations.values[0]) - top_cit_per_year = int(df['cit/year'].values[0]) - top_results = [top_author, top_citation, top_cit_per_year] - self.assertEqual(top_results, [' Bishop', 49230, 2896]) def test_cli_cit_per_year_sorted(self): - df=self.df_top_sorted_cit_per_year_cli - top_citations=list(df.Citations.values[:5]) + df = self.df_top_sorted_cit_per_year_cli + top_citations = list(df.Citations.values[:5]) top_cit_per_year = list(df['cit/year'].values[:5]) - top_results = [top_citations, top_cit_per_year] - self.assertEqual(top_results, [[49230, 8603, 2853, 3166, 2416], - [2896, 782, 571, 352, 302]]) - + + # Convert np.int64 values to Python int + top_citations = [int(c) for c in top_citations] + top_cit_per_year = [int(c) for c in top_cit_per_year] + top_results = [top_citations, top_cit_per_year] + self.assertEqual(top_results, [ + [2853, 3166, 2416, 598, 948], + [571, 352, 302, 85, 79] + ]) if __name__=='__main__': unittest.main() \ No newline at end of file