danulr05 commited on
Commit
6405203
·
verified ·
1 Parent(s): fd7a2f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -21
app.py CHANGED
@@ -50,8 +50,8 @@ def get_language_specific_data(proposal_data, field, language='en'):
50
 
51
  # If it's the new multi-language format, return language-specific data
52
  if isinstance(proposal_data.get(field), dict):
53
- return proposal_data.get(field, {}).get(language,
54
- proposal_data.get(field, {}).get('en', ''))
55
 
56
  return ''
57
 
@@ -152,22 +152,27 @@ def semantic_search(query: str, top_k=1, category_filter=None, language='en'):
152
  category = get_language_specific_data(proposal_data, "category", language)
153
  thumb_url = metadata.get("thumbUrl", "")
154
 
155
- result = {
156
- "title": title,
157
- "summary": summary,
158
- "costLKR": costLKR,
159
- "category": category,
160
- "pdfUrl": f"assets/pdfs/{file_path}" if file_path else "",
161
- "thumbUrl": f"assets/thumbs/{thumb_url}" if thumb_url else "",
162
- "score": score,
163
- "relevance_percentage": int(score * 100),
164
- "file_path": file_path,
165
- "id": match["id"],
166
- "content": metadata.get("content", "") # Add the actual content
167
- }
168
-
169
- results.append(result)
170
- doc_count += 1
 
 
 
 
 
171
  break
172
 
173
  return results
@@ -233,8 +238,9 @@ def get_all_proposals(category_filter=None, language='en'):
233
  thumb_url = metadata.get("thumbUrl", "")
234
 
235
  # Only include documents that have meaningful content in the requested language
236
- # Skip documents where title is empty or "Unknown", but allow "No summary available"
237
- if (title and title.strip() and title not in ["Unknown", "Unknown Title"]):
 
238
 
239
  result = {
240
  "title": title,
@@ -340,7 +346,12 @@ def get_categories():
340
  for file_path, metadata in DYNAMIC_METADATA.items():
341
  category = metadata.get("category")
342
  if category:
343
- categories.add(category)
 
 
 
 
 
344
 
345
  # If no categories from metadata, fallback to Pinecone
346
  if not categories:
 
50
 
51
  # If it's the new multi-language format, return language-specific data
52
  if isinstance(proposal_data.get(field), dict):
53
+ # Only return data for the requested language, no fallback
54
+ return proposal_data.get(field, {}).get(language, '')
55
 
56
  return ''
57
 
 
152
  category = get_language_specific_data(proposal_data, "category", language)
153
  thumb_url = metadata.get("thumbUrl", "")
154
 
155
+ # Only include documents that have meaningful content in the requested language
156
+ # Skip documents where title and summary are empty or "Unknown"/"No summary available"
157
+ if (title and title.strip() and title not in ["Unknown", "Unknown Title", ""] and
158
+ summary and summary.strip() and summary not in ["No summary available", ""]):
159
+
160
+ result = {
161
+ "title": title,
162
+ "summary": summary,
163
+ "costLKR": costLKR,
164
+ "category": category,
165
+ "pdfUrl": f"assets/pdfs/{file_path}" if file_path else "",
166
+ "thumbUrl": f"assets/thumbs/{thumb_url}" if thumb_url else "",
167
+ "score": score,
168
+ "relevance_percentage": int(score * 100),
169
+ "file_path": file_path,
170
+ "id": match["id"],
171
+ "content": metadata.get("content", "") # Add the actual content
172
+ }
173
+
174
+ results.append(result)
175
+ doc_count += 1
176
  break
177
 
178
  return results
 
238
  thumb_url = metadata.get("thumbUrl", "")
239
 
240
  # Only include documents that have meaningful content in the requested language
241
+ # Skip documents where title and summary are empty or "Unknown"/"No summary available"
242
+ if (title and title.strip() and title not in ["Unknown", "Unknown Title", ""] and
243
+ summary and summary.strip() and summary not in ["No summary available", ""]):
244
 
245
  result = {
246
  "title": title,
 
346
  for file_path, metadata in DYNAMIC_METADATA.items():
347
  category = metadata.get("category")
348
  if category:
349
+ # Handle both string and dict formats
350
+ if isinstance(category, dict):
351
+ # Extract English category from dict
352
+ category = category.get("en", "")
353
+ if category:
354
+ categories.add(category)
355
 
356
  # If no categories from metadata, fallback to Pinecone
357
  if not categories: