Skip to content

Commit

Permalink
Raise error if tensorflow_text is not found (#2427)
Browse files Browse the repository at this point in the history
  • Loading branch information
sampathweb authored Apr 29, 2024
1 parent 669b054 commit fcf9576
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from keras_nlp.layers import StartEndPacker
except ImportError:
keras_nlp = None
StartEndPacker = None


@keras_cv_export("keras_cv.models.feature_extractor.CLIPProcessor")
Expand Down
12 changes: 11 additions & 1 deletion keras_cv/src/models/feature_extractor/clip/clip_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@
# limitations under the License.
import regex as re
import tensorflow as tf
import tensorflow_text as tf_text

try:
import tensorflow_text as tf_text
except ImportError:
tf_text = None

try:
import keras_nlp
from keras_nlp.tokenizers import BytePairTokenizer
except ImportError:
keras_nlp = None
BytePairTokenizer = object

# As python and TF handles special spaces differently, we need to
# manually handle special spaces during string split.
Expand All @@ -41,6 +46,11 @@ def split_strings_for_bpe(inputs, unsplittable_tokens=None):
# support lookahead match, we are using an alternative insert a special
# token "६" before leading space of non-space characters and after the
# trailing space, e.g., " keras" will be "६ keras".
if tf_text is None:
raise ImportError(
"BytePairTokenization requires `tensorflow_text`."
"Please install with `pip install tensorflow_text`."
)
inputs = tf.strings.regex_replace(
inputs, rf"( )([^\s{SPECIAL_WHITESPACES}])", r"६\1\2"
)
Expand Down

0 comments on commit fcf9576

Please sign in to comment.