Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Reference is shown multiple times for every sentence in response #56

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 6 additions & 12 deletions code/backend/batch/utilities/parser/output_parser_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,11 @@ def _get_source_docs_from_answer(self, answer):
results = re.findall(r"\[doc(\d+)\]", answer)
return [int(i) for i in results]

def _replace_last(self, text, old, new):
"""Replaces the last occurence of a substring in a string

This is done by reversing the string using [::-1], replacing the first occurence of the reversed substring, and
reversing the string again.
"""
return (text[::-1].replace(old[::-1], new[::-1], 1))[::-1]

def _make_doc_references_sequential(self, answer, doc_ids):
for i, idx in enumerate(doc_ids):
answer = self._replace_last(answer, f"[doc{idx}]", f"[doc{i+1}]")
def _make_doc_references_sequential(self, answer):
doc_matches = list(re.finditer(r"\[doc\d+\]", answer))
for i, match in enumerate(doc_matches):
start, end = match.span()
answer = answer[:start] + f"[doc{i + 1}]" + answer[end:]
return answer

def parse(
Expand All @@ -42,7 +36,7 @@ def parse(
) -> List[dict]:
answer = self._clean_up_answer(answer)
doc_ids = self._get_source_docs_from_answer(answer)
answer = self._make_doc_references_sequential(answer, doc_ids)
answer = self._make_doc_references_sequential(answer)

# create return message object
messages = [
Expand Down
5 changes: 2 additions & 3 deletions code/frontend/src/components/Answer/AnswerParser.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ let filteredCitations = [] as Citation[];

// Define a function to check if a citation with the same Chunk_Id already exists in filteredCitations
const isDuplicate = (citation: Citation,citationIndex:string) => {
return filteredCitations.some((c) => c.chunk_id === citation.chunk_id) ;
return filteredCitations.some((c) => c.chunk_id === citation.chunk_id && c.id === citation.id) ;
};

export function parseAnswer(answer: AskResponse): ParsedAnswer {
Expand All @@ -28,12 +28,11 @@ export function parseAnswer(answer: AskResponse): ParsedAnswer {
let citation = cloneDeep(answer.citations[Number(citationIndex) - 1]) as Citation;
if (!isDuplicate(citation, citationIndex) && citation !== undefined) {
answerText = answerText.replaceAll(link, ` ^${++citationReindex}^ `);
citation.id = citationIndex; // original doc index to de-dupe
citation.reindex_id = citationReindex.toString(); // reindex from 1 for display
filteredCitations.push(citation);
}else{
// Replacing duplicate citation with original index
let matchingCitation = filteredCitations.find((ct) => citation.chunk_id == ct.chunk_id);
let matchingCitation = filteredCitations.find((ct) => citation.chunk_id === ct.chunk_id && citation.id === ct.id);
if (matchingCitation) {
answerText= answerText.replaceAll(link, ` ^${matchingCitation.reindex_id}^ `)
}
Expand Down
78 changes: 44 additions & 34 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ param azureOpenAIVisionModelVersion string = 'vision-preview'
@description('Azure OpenAI Vision Model Capacity - See here for more info https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/quota')
param azureOpenAIVisionModelCapacity int = 10

@description('Orchestration strategy: openai_function or semantic_kernel or langchain str. If you use a old version of turbo (0301), please select langchain')
@description('Orchestration strategy: openai_function or semantic_kernel or langchain str. If you use a old version of turbo (0301), please select langchain. If the database type is PostgreSQL, set this to sementic_kernel.')
@allowed([
'openai_function'
'semantic_kernel'
Expand All @@ -150,7 +150,7 @@ param azureOpenAIVisionModelCapacity int = 10
])
param orchestrationStrategy string = 'semantic_kernel'

@description('Chat conversation type: custom or byod.')
@description('Chat conversation type: custom or byod. If the database type is PostgreSQL, set this to custom.')
@allowed([
'custom'
'byod'
Expand Down Expand Up @@ -321,7 +321,7 @@ var eventGridSystemTopicName = 'doc-processing'
var tags = { 'azd-env-name': environmentName }
var rgName = 'rg-${environmentName}'
var keyVaultName = 'kv-${resourceToken}'
var baseUrl = 'https://raw.githubusercontent.com/Fr4nc3/chat-with-your-data-solution-accelerator/main/'
var baseUrl = 'https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/main/'
var azureOpenAIModelInfo = string({
model: azureOpenAIModel
modelName: azureOpenAIModelName
Expand All @@ -333,8 +333,8 @@ var azureOpenAIEmbeddingModelInfo = string({
modelVersion: azureOpenAIEmbeddingModelVersion
})

var appversion = 'devpostgre' // Update GIT deployment branch
var registryName = 'cwydcontainerregpk' // Update Registry name
var appversion = 'latest' // Update GIT deployment branch
var registryName = 'fruoccopublic' // Update Registry name

// Organize resources in a resource group
resource rg 'Microsoft.Resources/resourceGroups@2021-04-01' = {
Expand Down Expand Up @@ -383,7 +383,9 @@ module keyvault './core/security/keyvault.bicep' = if (useKeyVault || authType =
location: location
tags: tags
principalId: principalId
managedIdentityObjectId: databaseType == 'PostgreSQL' ? managedIdentityModule.outputs.managedIdentityOutput.objectId : ''
managedIdentityObjectId: databaseType == 'PostgreSQL'
? managedIdentityModule.outputs.managedIdentityOutput.objectId
: ''
}
}

Expand Down Expand Up @@ -877,13 +879,15 @@ module adminweb './app/adminweb.bicep' = if (hostingModel == 'code') {
LOGLEVEL: logLevel
DATABASE_TYPE: databaseType
},
databaseType == 'PostgreSQL' ? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: adminWebsiteName
})
} : {}
databaseType == 'PostgreSQL'
? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: adminWebsiteName
})
}
: {}
)
}
}
Expand Down Expand Up @@ -961,13 +965,15 @@ module adminweb_docker './app/adminweb.bicep' = if (hostingModel == 'container')
LOGLEVEL: logLevel
DATABASE_TYPE: databaseType
},
databaseType == 'PostgreSQL' ? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: '${adminWebsiteName}-docker'
})
} : {}
databaseType == 'PostgreSQL'
? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: '${adminWebsiteName}-docker'
})
}
: {}
)
}
}
Expand Down Expand Up @@ -1067,13 +1073,15 @@ module function './app/function.bicep' = if (hostingModel == 'code') {
AZURE_SEARCH_TOP_K: azureSearchTopK
DATABASE_TYPE: databaseType
},
databaseType == 'PostgreSQL' ? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: functionName
})
} : {}
databaseType == 'PostgreSQL'
? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: functionName
})
}
: {}
)
}
}
Expand Down Expand Up @@ -1138,13 +1146,15 @@ module function_docker './app/function.bicep' = if (hostingModel == 'container')
AZURE_SEARCH_TOP_K: azureSearchTopK
DATABASE_TYPE: databaseType
},
databaseType == 'PostgreSQL' ? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: '${functionName}-docker'
})
} : {}
databaseType == 'PostgreSQL'
? {
AZURE_POSTGRESQL_INFO: string({
host: postgresDBModule.outputs.postgresDbOutput.postgreSQLServerName
dbname: postgresDBModule.outputs.postgresDbOutput.postgreSQLDatabaseName
user: '${functionName}-docker'
})
}
: {}
)
}
}
Expand Down
12 changes: 6 additions & 6 deletions infra/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"_generator": {
"name": "bicep",
"version": "0.30.23.60470",
"templateHash": "8207330457159201967"
"templateHash": "15536398457562727557"
}
},
"parameters": {
Expand Down Expand Up @@ -312,7 +312,7 @@
"prompt_flow"
],
"metadata": {
"description": "Orchestration strategy: openai_function or semantic_kernel or langchain str. If you use a old version of turbo (0301), please select langchain"
"description": "Orchestration strategy: openai_function or semantic_kernel or langchain str. If you use a old version of turbo (0301), please select langchain. If the database type is PostgreSQL, set this to sementic_kernel."
}
},
"conversationFlow": {
Expand All @@ -323,7 +323,7 @@
"byod"
],
"metadata": {
"description": "Chat conversation type: custom or byod."
"description": "Chat conversation type: custom or byod. If the database type is PostgreSQL, set this to custom."
}
},
"azureOpenAITemperature": {
Expand Down Expand Up @@ -648,11 +648,11 @@
},
"rgName": "[format('rg-{0}', parameters('environmentName'))]",
"keyVaultName": "[format('kv-{0}', parameters('resourceToken'))]",
"baseUrl": "https://raw.githubusercontent.com/Fr4nc3/chat-with-your-data-solution-accelerator/main/",
"baseUrl": "https://raw.githubusercontent.com/Azure-Samples/chat-with-your-data-solution-accelerator/main/",
"azureOpenAIModelInfo": "[string(createObject('model', parameters('azureOpenAIModel'), 'modelName', parameters('azureOpenAIModelName'), 'modelVersion', parameters('azureOpenAIModelVersion')))]",
"azureOpenAIEmbeddingModelInfo": "[string(createObject('model', parameters('azureOpenAIEmbeddingModel'), 'modelName', parameters('azureOpenAIEmbeddingModelName'), 'modelVersion', parameters('azureOpenAIEmbeddingModelVersion')))]",
"appversion": "devpostgre",
"registryName": "cwydcontainerregpk",
"appversion": "latest",
"registryName": "fruoccopublic",
"defaultOpenAiDeployments": [
{
"name": "[parameters('azureOpenAIModel')]",
Expand Down
Loading