Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: System Config #3

Merged
merged 4 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions packages/frp-cli/frp_cli/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
from argparse import ArgumentParser, BooleanOptionalAction
from pathlib import Path
from frp import FRPScholarlyAnalysis, Matcher
import toml


def scholarly_analysis(input_csv: Path, frp_title: str, frp_year: int, save_results: bool, save_output: Path) -> None:
def scholarly_analysis(input_csv: Path, config_path: Path, frp_title: str, frp_year: int, save_results: bool, save_output: Path) -> None:
if not input_csv.exists():
print('File {} does not exist'.format(input_csv))
exit(1)

if not config_path.exists():
print('File {} does not exist'.format(config_path))
exit(1)

with open(config_path, 'r') as config_file:
config = toml.load(config_file)

# Make the matcher
matcher = Matcher()
matcher = Matcher(config['scholarly']['matcher'])

# Run the analysis
analyzer = FRPScholarlyAnalysis(matcher)
analyzer = FRPScholarlyAnalysis(matcher, config['scholarly'])

# Collect the results
results = analyzer.run_frp_analysis(input_csv, frp_title, frp_year)
Expand All @@ -35,6 +43,9 @@ def main():
scholarly_parser.add_argument('--input',
required=True,
help='Input CSV')
scholarly_parser.add_argument('--config',
required=True,
help='Location of the FRP config file')
scholarly_parser.add_argument('--frp-title',
required=False,
default='Leveraging AI to Examine Disparities and Bias in Health Care',
Expand All @@ -58,7 +69,7 @@ def main():

# Determine the correct command to run
if args.command == 'scholarly':
scholarly_analysis(Path(args.input), args.frp_title, args.frp_year, args.save_output, Path(args.output_csv))
scholarly_analysis(Path(args.input), Path(args.config), args.frp_title, args.frp_year, args.save_output, Path(args.output_csv))
return
else:
print('Command {} not recognized'.format(args.command))
Expand Down
24 changes: 23 additions & 1 deletion packages/frp-cli/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages/frp-cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ main = "frp_cli:main"

[tool.poetry.dependencies]
python = "^3.9"
toml = "^0.10.2"


[tool.poetry.group.dev.dependencies]
flake8 = "^7.1.0"
black = "^24.4.2"
mypy = "^1.10.0"
types-toml = "^0.10.8.20240310"

[build-system]
requires = ["poetry-core"]
Expand Down
39 changes: 14 additions & 25 deletions packages/frp/frp/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,28 @@ def _type(self) -> str:


class Matcher:
def __init__(self):
def __init__(self, config: dict):
# Pull out the needed configs
system_prompt = config['system_prompt']
human_prompt = config['human_prompt']
model_name = config['model_name']
model_base_url = config['model_base_url']

# Build up the LangChain chain for handling the matching
prompt_template = self._get_prompt_template()
model = self._get_model()
prompt_template = self._get_prompt_template(system_prompt, human_prompt)
model = self._get_model(model_base_url, model_name)
output_parser = self._get_output_parser()

self._chain = prompt_template | model | output_parser

def _get_prompt_template(self) -> runnables.Runnable:
def _get_prompt_template(self, system_prompt: str, human_prompt: str) -> runnables.Runnable:
return ChatPromptTemplate.from_messages([
('system',
'''
You are an assistant tasked with classifying whether the given publication title
is associated with the given research topic.

Specifically, the content should be marked as relevant if it involves:
1. Publications which are likely to have been written based on the research topic as a prompt.
2. If the publication title has overlap with the research topic.

Generate a short response indicating whether the content meets any of the above criteria. Respond
with "Yes" for relevance or "No" if the publication does not have high overlap.
'''),
('human', '''
Assess the given headline and article body based on the specified criteria. Provide a concise response indicating relevance.

Publication Title: {publication_title}

Research Topic: {frp_title}
''')
('system', system_prompt),
('human', human_prompt)
])

def _get_model(self) -> runnables.Runnable:
return Ollama(base_url='https://ollama-sail-24887a.apps.shift.nerc.mghpcc.org', model='llama2:13b')
def _get_model(self, base_url: str, model_name: str) -> runnables.Runnable:
return Ollama(base_url=base_url, model=model_name)

def _get_output_parser(self) -> runnables.Runnable:
return BooleanOutputParser()
Expand Down
17 changes: 15 additions & 2 deletions packages/frp/frp/scholarly.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ class FRPScholarlyAnalysis:
"""
Handles the cleaning and matching of MyCV CSVs to FRPs
"""
def __init__(self, matcher: Matcher):
def __init__(self, matcher: Matcher, config: dict):
self._matcher = matcher

# Getting the mapping between columns and inputs
# to the matcher
# The mapping is between parameters in the matching
# to what they are referred to within the DataFrame.
# ex) publication_title: 'Title OR Chapter title'
self._mappings = config['matcher']['mappings']

def _load(self, csv_location: Path) -> pd.DataFrame:
"""
Read in the dataframe from the CSV. Does not additional
Expand Down Expand Up @@ -77,10 +84,16 @@ def _match(self, df: pd.DataFrame, frp_title: str) -> pd.DataFrame:
"""
# Function which is applied to every row in the dataframe
def apply_matcher(row: pd.Series) -> pd.Series:
# First get all shared mappings
mapping = {
'publication_title': row['Title OR Chapter title'],
'frp_title': frp_title
}

# Then, add in the values from the row as defined in the
# config
for key, value in self._mappings.items():
mapping[key] = row[value]

return pd.Series(self._matcher.match(mapping))

# Make a copy of the data any apply the matching row-by-row
Expand Down
Loading