Skip to content

Commit

Permalink
refactoring: remove redundant document marker token id
Browse files Browse the repository at this point in the history
  • Loading branch information
joein committed Feb 6, 2025
1 parent c0fedd7 commit 9062cfe
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions fastembed/late_interaction_multimodal/colpali.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@


class ColPali(LateInteractionMultimodalEmbeddingBase, OnnxMultimodalModel[np.ndarray]):
DOCUMENT_MARKER_TOKEN_ID = 2
QUERY_PREFIX = "Query: "
BOS_TOKEN = "<s>"
PAD_TOKEN = "<pad>"
QUERY_MARKER_TOKEN_ID = [2, 5098]
IMAGE_PLACEHOLDER_SIZE = (3, 448, 448)
EMPTY_TEXT_PLACEHOLDER = np.array(
[257152] * 1024 + [2, 50721, 573, 2416, 235265, 108]
) # This is a tokenization of '<image>' * 1024 + '<bos>Describe the image.\n' line which is used as placeholder while processing just image
) # This is a tokenization of '<image>' * 1024 + '<bos>Describe the image.\n' line which is used as placeholder
# while processing an image
EVEN_ATTENTION_MASK = np.array([1] * 1030)

def __init__(
Expand Down

0 comments on commit 9062cfe

Please sign in to comment.