Spaces:

ariG23498
/

mistral-embed

Paused

ariG23498 HF Staff commited on 28 days ago

Commit

e554b85

verified ·

1 Parent(s): ad732ce

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -66,12 +66,13 @@ attribution and actions without speculation.""",
     attention_mask = inputs["attention_mask"].to(device)
     # Forward pass through the model
-    output = text_encoder(
-        input_ids=input_ids,
-        attention_mask=attention_mask,
-        output_hidden_states=True,
-        use_cache=False,
-    )
     # Only use outputs from intermediate layers and stack them
     out = torch.stack([output.hidden_states[k] for k in hidden_states_layers], dim=1)
@@ -136,8 +137,13 @@ def read_root():
 @app.get("/predict")
 def predict(prompt: str = Query(...)):
-    output = encode_prompt(
         prompt=prompt,
         device=device,
     )
-    return {"response": output}

     attention_mask = inputs["attention_mask"].to(device)
     # Forward pass through the model
+    with torch.inference_mode():
+        output = text_encoder(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            output_hidden_states=True,
+            use_cache=False,
+        )
     # Only use outputs from intermediate layers and stack them
     out = torch.stack([output.hidden_states[k] for k in hidden_states_layers], dim=1)
 @app.get("/predict")
 def predict(prompt: str = Query(...)):
+    prompt_embeds, text_ids = encode_prompt(
         prompt=prompt,
         device=device,
     )
+    return {
+        "response": {
+            "prompt_embeds": prompt_embeds.cpu().tolist(),
+            "text_ids": text_ids.cpu().tolist()
+        }
+    }