Skip to content

Commit

Permalink
feat: System Config (#3)
Browse files Browse the repository at this point in the history
* Toml based configuration support
* System and human prompt templates from config
* Ollama settings from config
* Mapping between dataframe -> LLM prompt from config
  • Loading branch information
cbolles authored Jun 18, 2024
1 parent 331c296 commit 10a2c1c
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 32 deletions.
19 changes: 15 additions & 4 deletions packages/frp-cli/frp_cli/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
from argparse import ArgumentParser, BooleanOptionalAction
from pathlib import Path
from frp import FRPScholarlyAnalysis, Matcher
import toml


def scholarly_analysis(input_csv: Path, frp_title: str, frp_year: int, save_results: bool, save_output: Path) -> None:
def scholarly_analysis(input_csv: Path, config_path: Path, frp_title: str, frp_year: int, save_results: bool, save_output: Path) -> None:
if not input_csv.exists():
print('File {} does not exist'.format(input_csv))
exit(1)

if not config_path.exists():
print('File {} does not exist'.format(config_path))
exit(1)

with open(config_path, 'r') as config_file:
config = toml.load(config_file)

# Make the matcher
matcher = Matcher()
matcher = Matcher(config['scholarly']['matcher'])

# Run the analysis
analyzer = FRPScholarlyAnalysis(matcher)
analyzer = FRPScholarlyAnalysis(matcher, config['scholarly'])

# Collect the results
results = analyzer.run_frp_analysis(input_csv, frp_title, frp_year)
Expand All @@ -35,6 +43,9 @@ def main():
scholarly_parser.add_argument('--input',
required=True,
help='Input CSV')
scholarly_parser.add_argument('--config',
required=True,
help='Location of the FRP config file')
scholarly_parser.add_argument('--frp-title',
required=False,
default='Leveraging AI to Examine Disparities and Bias in Health Care',
Expand All @@ -58,7 +69,7 @@ def main():

# Determine the correct command to run
if args.command == 'scholarly':
scholarly_analysis(Path(args.input), args.frp_title, args.frp_year, args.save_output, Path(args.output_csv))
scholarly_analysis(Path(args.input), Path(args.config), args.frp_title, args.frp_year, args.save_output, Path(args.output_csv))
return
else:
print('Command {} not recognized'.format(args.command))
Expand Down
24 changes: 23 additions & 1 deletion packages/frp-cli/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions packages/frp-cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ main = "frp_cli:main"

[tool.poetry.dependencies]
python = "^3.9"
toml = "^0.10.2"


[tool.poetry.group.dev.dependencies]
flake8 = "^7.1.0"
black = "^24.4.2"
mypy = "^1.10.0"
types-toml = "^0.10.8.20240310"

[build-system]
requires = ["poetry-core"]
Expand Down
39 changes: 14 additions & 25 deletions packages/frp/frp/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,28 @@ def _type(self) -> str:


class Matcher:
def __init__(self):
def __init__(self, config: dict):
# Pull out the needed configs
system_prompt = config['system_prompt']
human_prompt = config['human_prompt']
model_name = config['model_name']
model_base_url = config['model_base_url']

# Build up the LangChain chain for handling the matching
prompt_template = self._get_prompt_template()
model = self._get_model()
prompt_template = self._get_prompt_template(system_prompt, human_prompt)
model = self._get_model(model_base_url, model_name)
output_parser = self._get_output_parser()

self._chain = prompt_template | model | output_parser

def _get_prompt_template(self) -> runnables.Runnable:
def _get_prompt_template(self, system_prompt: str, human_prompt: str) -> runnables.Runnable:
return ChatPromptTemplate.from_messages([
('system',
'''
You are an assistant tasked with classifying whether the given publication title
is associated with the given research topic.
Specifically, the content should be marked as relevant if it involves:
1. Publications which are likely to have been written based on the research topic as a prompt.
2. If the publication title has overlap with the research topic.
Generate a short response indicating whether the content meets any of the above criteria. Respond
with "Yes" for relevance or "No" if the publication does not have high overlap.
'''),
('human', '''
Assess the given headline and article body based on the specified criteria. Provide a concise response indicating relevance.
Publication Title: {publication_title}
Research Topic: {frp_title}
''')
('system', system_prompt),
('human', human_prompt)
])

def _get_model(self) -> runnables.Runnable:
return Ollama(base_url='https://ollama-sail-24887a.apps.shift.nerc.mghpcc.org', model='llama2:13b')
def _get_model(self, base_url: str, model_name: str) -> runnables.Runnable:
return Ollama(base_url=base_url, model=model_name)

def _get_output_parser(self) -> runnables.Runnable:
return BooleanOutputParser()
Expand Down
17 changes: 15 additions & 2 deletions packages/frp/frp/scholarly.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@ class FRPScholarlyAnalysis:
"""
Handles the cleaning and matching of MyCV CSVs to FRPs
"""
def __init__(self, matcher: Matcher):
def __init__(self, matcher: Matcher, config: dict):
self._matcher = matcher

# Getting the mapping between columns and inputs
# to the matcher
# The mapping is between parameters in the matching
# to what they are referred to within the DataFrame.
# ex) publication_title: 'Title OR Chapter title'
self._mappings = config['matcher']['mappings']

def _load(self, csv_location: Path) -> pd.DataFrame:
"""
Read in the dataframe from the CSV. Does not additional
Expand Down Expand Up @@ -77,10 +84,16 @@ def _match(self, df: pd.DataFrame, frp_title: str) -> pd.DataFrame:
"""
# Function which is applied to every row in the dataframe
def apply_matcher(row: pd.Series) -> pd.Series:
# First get all shared mappings
mapping = {
'publication_title': row['Title OR Chapter title'],
'frp_title': frp_title
}

# Then, add in the values from the row as defined in the
# config
for key, value in self._mappings.items():
mapping[key] = row[value]

return pd.Series(self._matcher.match(mapping))

# Make a copy of the data any apply the matching row-by-row
Expand Down

0 comments on commit 10a2c1c

Please sign in to comment.