Home » Server Options » Text & interMedia » Oracle Text Search - Handling special characters and blank search term (Oracle DB v11.2)
Oracle Text Search - Handling special characters and blank search term [message #686793] Tue, 10 January 2023 08:12 Go to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Hi All,

I've tried the following in Oracle DB 11.2:

drop table ot_test;

create table ot_test(

id number primary key,

id_f varchar2(20),

prod_name varchar2(1000),

search_keywords varchar2(2000),

prod_description_short varchar2(2000));



begin

insert into ot_test values(1,101,'Laptops and Desktops','laptops,desktop','A laptop computer or notebook computer, also laptop or notebook for short, is a small, portable personal computer (PC) that is designed to be practically placed on the user''s lap, hence the name');

insert into ot_test values(2,102,'Antivirus Softwares','Antivirus,software','Antivirus software (abbreviated to AV software), also known as anti-malware, is a computer program used to prevent, detect, and remove malware. Antivirus software was originally developed to detect and remove computer viruses, hence the name. However, with the proliferation of other malware, antivirus software started to protect against other computer threats.');

insert into ot_test values(3,103,'sAAS products','saas, products','A SaaS product is an internet software that is accessible to all users. SaaS products are everywhere.');

insert into ot_test values(4,104,'Cloud computing','Cloud,computing','Cloud computing is the on-demand availability of computer system resources, especially data storage and computing power, without direct active management by the user.');

insert into ot_test values(5,105,'Optical mouse','Optical, mouse','An optical mouse is a computer mouse which uses a light source, typically a light-emitting diode, and a light detector, such as an array of photodiodes, to detect movement relative to a surface.');

end;

/

commit;

select * from ot_test;



create index ot_id_f on ot_test(id_f) indextype is ctxsys.context parameters ('sync (on commit)');

create index ot_prod_name on ot_test(prod_name) indextype is ctxsys.context parameters ('sync (on commit)');

create index ot_search_keywords on ot_test(search_keywords) indextype is ctxsys.context parameters ('sync (on commit)');

create index ot_prod_description_short on ot_test(prod_description_short) indextype is ctxsys.context parameters ('sync (on commit)');



select * from user_indexes where table_name = 'OT_TEST';



SELECT id,

id_f,

prod_name,

search_keywords,

prod_description_short,

score(1) sc_1,

score(2) sc_2,

score(3) sc_3,

score(4) sc_4

FROM ot_test

where

(

(

(

:P2_SEARCH_TERM is not null and

contains ( id_f,

'<query>

<textquery lang="ENGLISH" grammar="CONTEXT">' ||:P2_SEARCH_TERM || '

<progression>

<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>

<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>

</progression>

</textquery>

<score datatype="FLOAT" algorithm="COUNT"/>

</query>',

1 ) > 0

)

or :P2_SEARCH_TERM is null

)

or

(

(

:P2_SEARCH_TERM is not null and

contains ( prod_name,

'<query>

<textquery lang="ENGLISH" grammar="CONTEXT">' || :P2_SEARCH_TERM || '

<progression>

<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>

<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>

</progression>

</textquery>

<score datatype="FLOAT" algorithm="COUNT"/>

</query>',

2 ) > 0

)

or :P2_SEARCH_TERM is null

)

or

(

(

:P2_SEARCH_TERM is not null and

contains ( search_keywords,

'<query>

<textquery lang="ENGLISH" grammar="CONTEXT">' || :P2_SEARCH_TERM || '

<progression>

<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>

<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>

</progression>

</textquery>

<score datatype="FLOAT" algorithm="COUNT"/>

</query>',

3 ) > 0

)

or :P2_SEARCH_TERM is null

)

or

(

(

:P2_SEARCH_TERM is not null and

contains ( prod_description_short,

'<query>

<textquery lang="ENGLISH" grammar="CONTEXT">' || :P2_SEARCH_TERM || '

<progression>

<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>

<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>

</progression>

</textquery>

<score datatype="FLOAT" algorithm="COUNT"/>

</query>',

4 ) > 0

)

or :P2_SEARCH_TERM is null

)



)

order by sc_1 desc, sc_2 desc, sc_3 desc, sc_4 desc;



Questions:

The query is returning results as expected but If I provide blank (null) search term or special characters like (@, #, $ etc.); then it returns ORA-20000: Oracle Text error. I need help in solving these scenarios.
Re: Oracle Text Search - Handling special characters and blank search term [message #686794 is a reply to message #686793] Tue, 10 January 2023 10:35 Go to previous messageGo to next message
Michel Cadot
Messages: 68675
Registered: March 2007
Location: Nanterre, France, http://...
Senior Member
Account Moderator

Welcome to the forum.
Please read the OraFAQ Forum Guide and How to use [code] tags and make your code easier to read.
Indent the code, use code tags and align the columns in result.

Also always post your Oracle version, with 4 decimals (query v$version), as often solution depends on it.

The complete error message is (from my test):
ORA-20000: Oracle Text error:
DRG-50926: invalid text query string
Please confirm.

Re: Oracle Text Search - Handling special characters and blank search term [message #686795 is a reply to message #686794] Tue, 10 January 2023 12:15 Go to previous messageGo to next message
Barbara Boehmer
Messages: 9097
Registered: November 2002
Location: California, USA
Senior Member
There are a number of issues here. One issue is that Oracle Text does not know what to do with the special characters. Another is that if the text query is not executed and there is no score then it does not know what to do when you have selected that nonexistsent score. In the following, I have used your data and indexes, but a simplified query using just one of your columns and corresponding score. You should be able to modify the rest of your query in the same manner. I have used translate and replace to remove the three special characters that you specified and spaces and check for null. I have used just the translate to replace the special characters inside the query, but left the spaces. I have also used a case statement to allow for when there is no score. You will need to add any other special characters and their replacements to each translate statement and you will need to add one case statement for each score. Please see the comment lines for what goes where. Another alternative might be to clean up your variable prior to passing it or within a separate select statement.

SCOTT@orcl_12.1.0.2.0> VARIABLE P2_SEARCH_TERM VARCHAR2(30)
SCOTT@orcl_12.1.0.2.0> EXEC :P2_SEARCH_TERM := 'CLOUD'

PL/SQL procedure successfully completed.

SCOTT@orcl_12.1.0.2.0> SELECT prod_description_short,
  2  -- add case statement for each score using replace and translate
  3  	    CASE
  4  	      WHEN REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '   '), ' ', '') is not null
  5  	      THEN score(4)
  6  	      ELSE 0
  7  	    END sc_4
  8  FROM   ot_test
  9  where  (
 10  (
 11  (
 12  -- add replace and translate to replace special characters and spaces and check for null
 13  REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '   '), ' ', '') is not null and
 14  contains ( prod_description_short,
 15  -- add translate to remove special characters but leave spaces
 16  '<query>
 17  <textquery lang="ENGLISH" grammar="CONTEXT">' || TRANSLATE (:P2_SEARCH_TERM, '$#@', '   ') || '
 18  <progression>
 19  <seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>
 20  <seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>
 21  </progression>
 22  </textquery>
 23  <score datatype="FLOAT" algorithm="COUNT"/>
 24  </query>',
 25  4 ) > 0
 26  )
 27  -- add replace and translate to replace special characters and spaces and check for null
 28  or REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '	'), ' ', '') is null
 29  )
 30  )
 31  order by sc_4 desc
 32  /

PROD_DESCRIPTION_SHORT
--------------------------------------------------------------------------------
      SC_4
----------
Cloud computing is the on-demand availability of computer system resources, espe
cially data storage and computing power, without direct active management by the
 user.
      50.5


1 row selected.

SCOTT@orcl_12.1.0.2.0> EXEC :P2_SEARCH_TERM := NULL

PL/SQL procedure successfully completed.

SCOTT@orcl_12.1.0.2.0> /

PROD_DESCRIPTION_SHORT
--------------------------------------------------------------------------------
      SC_4
----------
A laptop computer or notebook computer, also laptop or notebook for short, is a
small, portable personal computer (PC) that is designed to be practically placed
 on the user's lap, hence the name
         0

Antivirus software (abbreviated to AV software), also known as anti-malware, is
a computer program used to prevent, detect, and remove malware. Antivirus softwa
re was originally developed to detect and remove computer viruses, hence the nam
e. However, with the proliferation of other malware, antivirus software started
to protect against other computer threats.
         0

An optical mouse is a computer mouse which uses a light source, typically a ligh
t-emitting diode, and a light detector, such as an array of photodiodes, to dete
ct movement relative to a surface.
         0

Cloud computing is the on-demand availability of computer system resources, espe
cially data storage and computing power, without direct active management by the
 user.
         0

A SaaS product is an internet software that is accessible to all users. SaaS pro
ducts are everywhere.
         0


5 rows selected.

SCOTT@orcl_12.1.0.2.0> EXEC :P2_SEARCH_TERM := 'cloud@#$computing'

PL/SQL procedure successfully completed.

SCOTT@orcl_12.1.0.2.0> /

PROD_DESCRIPTION_SHORT
--------------------------------------------------------------------------------
      SC_4
----------
Cloud computing is the on-demand availability of computer system resources, espe
cially data storage and computing power, without direct active management by the
 user.
      50.5


1 row selected.

[Updated on: Tue, 10 January 2023 12:21]

Report message to a moderator

Re: Oracle Text Search - Handling special characters and blank search term [message #686801 is a reply to message #686795] Wed, 11 January 2023 23:06 Go to previous messageGo to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Thank you Barbara. it's working perfectly.
Re: Oracle Text Search - Handling special characters and blank search term [message #686802 is a reply to message #686794] Wed, 11 January 2023 23:12 Go to previous messageGo to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Hello Michel,
Thank you for allowing my post in the forum; for future queries i'll follow the forum guidelines. Yes, following are the error messages:
ORA-20000: Oracle Text error:
DRG-50926: invalid text query string

And, while searching for any special character which is not mentioned in the query; getting the DRG-50962 error as well; i think if i add them in the query then this error will not occur.

Thanks.
Re: Oracle Text Search - Handling special characters and blank search term [message #686818 is a reply to message #686795] Mon, 16 January 2023 07:38 Go to previous messageGo to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Hi Barbara,
I've inserted the following record:
insert into ot_test values(6,106,'ai-chatbot','ai-chatbot,chatGPT','ChatGPT is a chatbot launched by OpenAI in November 2022. It is built on top of OpenAI''s GPT-3 family of large language models, and is fine-tuned with both supervised and reinforcement learning techniques.');
And, ran the following query for a scenario where a special character is part of the search term (Search Term : GPT-3 ):

SELECT prod_description_short,
    -- add case statement for each score using replace and translate
    	    CASE
    	      WHEN REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '   '), ' ', '') is not null
    	      THEN score(4)
    	      ELSE 0
    	    END sc_4
    FROM   ot_test
    where  (
   (
   (
   -- add replace and translate to replace special characters and spaces and check for null
   REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '   '), ' ', '') is not null and
   contains ( prod_description_short,
   -- add translate to remove special characters but leave spaces
   '<query>
   <textquery lang="ENGLISH" grammar="CONTEXT">' || TRANSLATE (:P2_SEARCH_TERM, '$#@', '   ') || '
   <progression>
   <seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>
   <seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>
   </progression>
   </textquery>
   <score datatype="FLOAT" algorithm="COUNT"/>
   </query>',
   4 ) > 0
   )
   -- add replace and translate to replace special characters and spaces and check for null
   or REPLACE (TRANSLATE (:P2_SEARCH_TERM, '$#@', '	'), ' ', '') is null
   )
   )
   order by sc_4 desc
   /
Error:
ORA-20000: Oracle Text error:
DRG-50962: Query operators are not allowed in transform input string
20000. 00000 - "%s"
*Cause: The stored procedure 'raise_application_error'
was called which causes this error to be generated.
*Action: Correct the problem as described in the error message or contact
the application administrator or DBA for more information.


Thanks.

[Updated on: Mon, 16 January 2023 07:41]

Report message to a moderator

Re: Oracle Text Search - Handling special characters and blank search term [message #686819 is a reply to message #686818] Mon, 16 January 2023 08:06 Go to previous messageGo to next message
Michel Cadot
Messages: 68675
Registered: March 2007
Location: Nanterre, France, http://...
Senior Member
Account Moderator

As Barbara said:

Quote:
I have used translate and replace to remove the three special characters that you specified and spaces and check for null.

I think you have to add all characters that don't work in these functions (and so all operators).

[Updated on: Mon, 16 January 2023 08:07]

Report message to a moderator

Re: Oracle Text Search - Handling special characters and blank search term [message #686823 is a reply to message #686819] Mon, 16 January 2023 09:50 Go to previous messageGo to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Hi Michel,
I've added all the other special characters and symbols as shown below and they are giving desired result. Issue is when the Special characters are part of the search term (Search term I've used is GPT-3 which is present in the 6th record). But the query below is returning "no rows selected".

SELECT prod_description_short,
-- add case statement for each score using replace and translate
CASE
WHEN REPLACE (TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', ' '), ' ', '') is not null
THEN score(4)
ELSE 0
END sc_4
FROM ot_test
where (
(
(
-- add replace and translate to replace special characters and spaces and check for null
REPLACE (TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', ' '), ' ', '') is not null and
contains ( prod_description_short,
-- add translate to remove special characters but leave spaces
'<query>
<textquery lang="ENGLISH" grammar="CONTEXT">' || TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', ' ') || '
<progression>
<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>
<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>
</progression>
</textquery>
<score datatype="FLOAT" algorithm="COUNT"/>
</query>',
4 ) > 0
)
-- add replace and translate to replace special characters and spaces and check for null
or REPLACE (TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', ' '), ' ', '') is null
)
)
order by sc_4 desc
/

Thanks.
Re: Oracle Text Search - Handling special characters and blank search term [message #686825 is a reply to message #686823] Mon, 16 January 2023 10:28 Go to previous messageGo to next message
Michel Cadot
Messages: 68675
Registered: March 2007
Location: Nanterre, France, http://...
Senior Member
Account Moderator

I'm afraid you have to wait for Barbara to get a solution to your issue.

Re: Oracle Text Search - Handling special characters and blank search term [message #686827 is a reply to message #686825] Mon, 16 January 2023 10:31 Go to previous messageGo to next message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
ok. And, thanks for your prompt response.
Re: Oracle Text Search - Handling special characters and blank search term [message #686829 is a reply to message #686827] Mon, 16 January 2023 11:28 Go to previous messageGo to next message
Barbara Boehmer
Messages: 9097
Registered: November 2002
Location: California, USA
Senior Member
With the translate function, it translates each occurrence of the character in the second parameter to the corresponding character in the third parameter, but just removes others. In the original example, there were only three special characters, so I used only 3 spaces. If you use 27 characters with only 3 spaces, then it removes the 4th through 27th characters instead of replacing them with spaces, as shown below.

SCOTT@orcl_12.1.0.2.0> VARIABLE P2_SEARCH_TERM VARCHAR2(30)
SCOTT@orcl_12.1.0.2.0> EXEC :P2_SEARCH_TERM := 'GPT-3'

PL/SQL procedure successfully completed.

SCOTT@orcl_12.1.0.2.0> SELECT TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', '   ') FROM DUAL
  2  /

TRANSLATE(:P2_SEARCH_TERM,'!@#$%^&*()_-|\{}[]:;"<>,.?/','')
--------------------------------------------------------------------------------
GPT3

1 row selected.
If you use 27 spaces instead, then it replaces the hyphen with a space as shown below.
SCOTT@orcl_12.1.0.2.0> SELECT TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', '			     ') FROM DUAL
  2  /

TRANSLATE(:P2_SEARCH_TERM,'!@#$%^&*()_-|\{}[]:;"<>,.?/','')
--------------------------------------------------------------------------------
GPT 3

1 row selected.

When it is indexed, by default, the GPT and 3 are indexed as separate tokens, so with the 27 spaces the query can now find those tokens as shown below.
SCOTT@orcl_12.1.0.2.0> 
SCOTT@orcl_12.1.0.2.0> SELECT prod_description_short,
  2  		 CASE
  3  		   WHEN REPLACE (TRANSLATE (:P2_SEARCH_TERM,
  4  		     '!@#$%^&*()_-|\{}[]:;"<>,.?/', '	'), ' ', '') is not null
  5  		   THEN score(4)
  6  		   ELSE 0
  7  		 END sc_4
  8  	 FROM	ot_test
  9  	 where	(
 10  	(
 11  	(
 12  	REPLACE (TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', '	'), ' ', '') is not null and
 13  	contains ( prod_description_short,
 14  	-- add translate to remove special characters but leave spaces
 15  	-- if you translate 27 characters, then you need 27 spaces
 16  	'<query>
 17  	<textquery lang="ENGLISH" grammar="CONTEXT">' ||
 18  	TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', '			       ') || '
 19  	<progression>
 20  	<seq><rewrite>transform((TOKENS, "{", "}", " "))</rewrite></seq>
 21  	<seq><rewrite>transform((TOKENS, "{", "}", " NEAR "))</rewrite></seq>
 22  	</progression>
 23  	</textquery>
 24  	<score datatype="FLOAT" algorithm="COUNT"/>
 25  	</query>',
 26  	4 ) > 0
 27  	)
 28  	-- add replace and translate to replace special characters and spaces and check for null
 29  	or REPLACE (TRANSLATE (:P2_SEARCH_TERM, '!@#$%^&*()_-|\{}[]:;"<>,.?/', '     '), ' ', '') is null
 30  	)
 31  	)
 32  	order by sc_4 desc
 33  	/

PROD_DESCRIPTION_SHORT
--------------------------------------------------------------------------------
      SC_4
----------
ChatGPT is a chatbot launched by OpenAI in November 2022. It is built on top of
OpenAI's GPT-3 family of large language models, and is fine-tuned with both supe
rvised and reinforcement learning techniques.
      50.5


1 row selected.

SCOTT@orcl_12.1.0.2.0> 
Re: Oracle Text Search - Handling special characters and blank search term [message #686830 is a reply to message #686829] Mon, 16 January 2023 11:49 Go to previous message
OraDev16
Messages: 8
Registered: October 2021
Junior Member
Thank you Barbara. It's giving the desired result.

[Updated on: Mon, 16 January 2023 11:50]

Report message to a moderator

Previous Topic: ORACLE SQL POSSIBE BUG
Next Topic: Materialized View with Context Index
Goto Forum:
  


Current Time: Sat Sep 07 18:36:24 CDT 2024