comparison src/DLD-FUNCTIONS/regexp.cc @ 7242:e4398e3903be

[project @ 2007-12-03 23:52:07 by dbateman]
author dbateman
date Mon, 03 Dec 2007 23:52:07 +0000
parents 7e248dd1e6d5
children b166043585a8 35d5c824f1eb
comparison
equal deleted inserted replaced
7241:da540e715d05 7242:e4398e3903be
925 %! assert (isempty(te{1})) 925 %! assert (isempty(te{1}))
926 %! assert (m{1},'short') 926 %! assert (m{1},'short')
927 %! ## Matlab gives [1,0] here but that seems wrong. 927 %! ## Matlab gives [1,0] here but that seems wrong.
928 %! assert (size(t), [1,1]) 928 %! assert (size(t), [1,1])
929 929
930 %!test 930 %!testif HAVE_PCRE
931 %! ## This test is expected to fail if PCRE is not installed 931 %! ## This test is expected to fail if PCRE is not installed
932 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 932 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)');
933 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); 933 %! assert (s,1)
934 %! assert (s,1) 934 %! assert (e,10)
935 %! assert (e,10) 935 %! assert (size(te), [1,1])
936 %! assert (size(te), [1,1]) 936 %! assert (te{1}, [1 5; 7, 10])
937 %! assert (te{1}, [1 5; 7, 10]) 937 %! assert (m{1},'short test')
938 %! assert (m{1},'short test') 938 %! assert (size(t),[1,1])
939 %! assert (size(t),[1,1]) 939 %! assert (t{1}{1},'short')
940 %! assert (t{1}{1},'short') 940 %! assert (t{1}{2},'test')
941 %! assert (t{1}{2},'test') 941 %! assert (size(nm), [1,1])
942 %! assert (size(nm), [1,1]) 942 %! assert (!isempty(fieldnames(nm)))
943 %! assert (!isempty(fieldnames(nm))) 943 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
944 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) 944 %! assert (nm.word1,'short')
945 %! assert (nm.word1,'short') 945 %! assert (nm.word2,'test')
946 %! assert (nm.word2,'test') 946
947 %! endif 947 %!testif HAVE_PCRE
948
949 %!test
950 %! ## This test is expected to fail if PCRE is not installed 948 %! ## This test is expected to fail if PCRE is not installed
951 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 949 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
952 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); 950 %! assert (s,1)
953 %! assert (s,1) 951 %! assert (e,10)
954 %! assert (e,10) 952 %! assert (size(te), [1,1])
955 %! assert (size(te), [1,1]) 953 %! assert (te{1}, [1 5; 7, 10])
956 %! assert (te{1}, [1 5; 7, 10]) 954 %! assert (m{1},'short test')
957 %! assert (m{1},'short test') 955 %! assert (size(t),[1,1])
958 %! assert (size(t),[1,1]) 956 %! assert (t{1}{1},'short')
959 %! assert (t{1}{1},'short') 957 %! assert (t{1}{2},'test')
960 %! assert (t{1}{2},'test') 958 %! assert (size(nm), [1,1])
961 %! assert (size(nm), [1,1]) 959 %! assert (!isempty(fieldnames(nm)))
962 %! assert (!isempty(fieldnames(nm))) 960 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
963 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) 961 %! assert (nm.word1,'short')
964 %! assert (nm.word1,'short') 962 %! assert (nm.word2,'test')
965 %! assert (nm.word2,'test') 963
966 %! endif 964 %!testif HAVE_PCRE
967
968 %!test
969 %! ## This test is expected to fail if PCRE is not installed 965 %! ## This test is expected to fail if PCRE is not installed
970 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 966 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names');
971 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); 967 %! assert (size(t), [1,2]);
972 %! assert (size(t), [1,2]); 968 %! assert (t{1}{1},'John');
973 %! assert (t{1}{1},'John'); 969 %! assert (t{1}{2},'Davis');
974 %! assert (t{1}{2},'Davis'); 970 %! assert (t{2}{1},'Rogers');
975 %! assert (t{2}{1},'Rogers'); 971 %! assert (t{2}{2},'James');
976 %! assert (t{2}{2},'James'); 972 %! assert (size(nm), [1,1]);
977 %! assert (size(nm), [1,1]); 973 %! assert (nm.first{1},'John');
978 %! assert (nm.first{1},'John'); 974 %! assert (nm.first{2},'James');
979 %! assert (nm.first{2},'James'); 975 %! assert (nm.last{1},'Davis');
980 %! assert (nm.last{1},'Davis'); 976 %! assert (nm.last{2},'Rogers');
981 %! assert (nm.last{2},'Rogers');
982 %! endif
983 977
984 %!assert(regexp("abc\nabc",'.'),[1:7]) 978 %!assert(regexp("abc\nabc",'.'),[1:7])
985 %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) 979 %!assert(regexp("abc\nabc",'.','dotall'),[1:7])
986 %!test 980 %!testif HAVE_PCRE
987 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 981 %! assert(regexp("abc\nabc",'(?s).'),[1:7])
988 %! assert(regexp("abc\nabc",'(?s).'),[1:7]) 982 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
989 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) 983 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
990 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
991 %! endif
992 984
993 %!assert(regexp("caseCaSe",'case'),1) 985 %!assert(regexp("caseCaSe",'case'),1)
994 %!assert(regexp("caseCaSe",'case',"matchcase"),1) 986 %!assert(regexp("caseCaSe",'case',"matchcase"),1)
995 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) 987 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5])
996 %!test 988 %!testif HAVE_PCRE
997 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 989 %! assert(regexp("caseCaSe",'(?-i)case'),1)
998 %! assert(regexp("caseCaSe",'(?-i)case'),1) 990 %! assert(regexp("caseCaSe",'(?i)case'),[1,5])
999 %! assert(regexp("caseCaSe",'(?i)case'),[1,5])
1000 %! endif
1001 991
1002 %!assert (regexp("abc\nabc",'c$'),7) 992 %!assert (regexp("abc\nabc",'c$'),7)
1003 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) 993 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7)
1004 %!test 994 %!testif HAVE_PCRE
1005 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 995 %! assert (regexp("abc\nabc",'(?-m)c$'),7)
1006 %! assert (regexp("abc\nabc",'(?-m)c$'),7) 996 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7])
1007 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) 997 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7])
1008 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7])
1009 %! endif
1010 998
1011 %!assert (regexp("this word",'s w'),4) 999 %!assert (regexp("this word",'s w'),4)
1012 %!assert (regexp("this word",'s w','literalspacing'),4) 1000 %!assert (regexp("this word",'s w','literalspacing'),4)
1013 %!test 1001 %!testif HAVE_PCRE
1014 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1002 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4)
1015 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) 1003 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0))
1016 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) 1004 %! assert (regexp("this word",'(?x)s w'),zeros(1,0))
1017 %! assert (regexp("this word",'(?x)s w'),zeros(1,0))
1018 %! endif
1019 1005
1020 %!error regexp('string', 'tri', 'BadArg'); 1006 %!error regexp('string', 'tri', 'BadArg');
1021 %!error regexp('string'); 1007 %!error regexp('string');
1022 1008
1023 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) 1009 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)})
1115 %! assert (isempty(te{1})) 1101 %! assert (isempty(te{1}))
1116 %! assert (m{1},'ShoRt') 1102 %! assert (m{1},'ShoRt')
1117 %! ## Matlab gives [1,0] here but that seems wrong. 1103 %! ## Matlab gives [1,0] here but that seems wrong.
1118 %! assert (size(t), [1,1]) 1104 %! assert (size(t), [1,1])
1119 1105
1120 %!test 1106 %!testif HAVE_PCRE
1121 %! ## This test is expected to fail if PCRE is not installed 1107 %! ## This test is expected to fail if PCRE is not installed
1122 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1108 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)');
1123 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); 1109 %! assert (s,1)
1124 %! assert (s,1) 1110 %! assert (e,10)
1125 %! assert (e,10) 1111 %! assert (size(te), [1,1])
1126 %! assert (size(te), [1,1]) 1112 %! assert (te{1}, [1 5; 7, 10])
1127 %! assert (te{1}, [1 5; 7, 10]) 1113 %! assert (m{1},'ShoRt Test')
1128 %! assert (m{1},'ShoRt Test') 1114 %! assert (size(t),[1,1])
1129 %! assert (size(t),[1,1]) 1115 %! assert (t{1}{1},'ShoRt')
1130 %! assert (t{1}{1},'ShoRt') 1116 %! assert (t{1}{2},'Test')
1131 %! assert (t{1}{2},'Test') 1117 %! assert (size(nm), [1,1])
1132 %! assert (size(nm), [1,1]) 1118 %! assert (!isempty(fieldnames(nm)))
1133 %! assert (!isempty(fieldnames(nm))) 1119 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
1134 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) 1120 %! assert (nm.word1,'ShoRt')
1135 %! assert (nm.word1,'ShoRt') 1121 %! assert (nm.word2,'Test')
1136 %! assert (nm.word2,'Test') 1122
1137 %! endif 1123 %!testif HAVE_PCRE
1138
1139 %!test
1140 %! ## This test is expected to fail if PCRE is not installed 1124 %! ## This test is expected to fail if PCRE is not installed
1141 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1125 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
1142 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); 1126 %! assert (s,1)
1143 %! assert (s,1) 1127 %! assert (e,10)
1144 %! assert (e,10) 1128 %! assert (size(te), [1,1])
1145 %! assert (size(te), [1,1]) 1129 %! assert (te{1}, [1 5; 7, 10])
1146 %! assert (te{1}, [1 5; 7, 10]) 1130 %! assert (m{1},'ShoRt Test')
1147 %! assert (m{1},'ShoRt Test') 1131 %! assert (size(t),[1,1])
1148 %! assert (size(t),[1,1]) 1132 %! assert (t{1}{1},'ShoRt')
1149 %! assert (t{1}{1},'ShoRt') 1133 %! assert (t{1}{2},'Test')
1150 %! assert (t{1}{2},'Test') 1134 %! assert (size(nm), [1,1])
1151 %! assert (size(nm), [1,1]) 1135 %! assert (!isempty(fieldnames(nm)))
1152 %! assert (!isempty(fieldnames(nm))) 1136 %! assert (sort(fieldnames(nm)),{'word1';'word2'})
1153 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) 1137 %! assert (nm.word1,'ShoRt')
1154 %! assert (nm.word1,'ShoRt') 1138 %! assert (nm.word2,'Test')
1155 %! assert (nm.word2,'Test')
1156 %! endif
1157 1139
1158 %!assert(regexpi("abc\nabc",'.'),[1:7]) 1140 %!assert(regexpi("abc\nabc",'.'),[1:7])
1159 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) 1141 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7])
1160 %!test 1142 %!testif HAVE_PCRE
1161 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1143 %! assert(regexpi("abc\nabc",'(?s).'),[1:7])
1162 %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) 1144 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
1163 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) 1145 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
1164 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
1165 %! endif
1166 1146
1167 %!assert(regexpi("caseCaSe",'case'),[1,5]) 1147 %!assert(regexpi("caseCaSe",'case'),[1,5])
1168 %!assert(regexpi("caseCaSe",'case',"matchcase"),1) 1148 %!assert(regexpi("caseCaSe",'case',"matchcase"),1)
1169 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) 1149 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5])
1170 %!test 1150 %!testif HAVE_PCRE
1171 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1151 %! assert(regexpi("caseCaSe",'(?-i)case'),1)
1172 %! assert(regexpi("caseCaSe",'(?-i)case'),1) 1152 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5])
1173 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5])
1174 %! endif
1175 1153
1176 %!assert (regexpi("abc\nabc",'c$'),7) 1154 %!assert (regexpi("abc\nabc",'c$'),7)
1177 %!assert (regexpi("abc\nabc",'c$',"stringanchors"),7) 1155 %!assert (regexpi("abc\nabc",'c$',"stringanchors"),7)
1178 %!test 1156 %!testif HAVE_PCRE
1179 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1157 %! assert (regexpi("abc\nabc",'(?-m)c$'),7)
1180 %! assert (regexpi("abc\nabc",'(?-m)c$'),7) 1158 %! assert (regexpi("abc\nabc",'c$',"lineanchors"),[3,7])
1181 %! assert (regexpi("abc\nabc",'c$',"lineanchors"),[3,7]) 1159 %! assert (regexpi("abc\nabc",'(?m)c$'),[3,7])
1182 %! assert (regexpi("abc\nabc",'(?m)c$'),[3,7])
1183 %! endif
1184 1160
1185 %!assert (regexpi("this word",'s w'),4) 1161 %!assert (regexpi("this word",'s w'),4)
1186 %!assert (regexpi("this word",'s w','literalspacing'),4) 1162 %!assert (regexpi("this word",'s w','literalspacing'),4)
1187 %!test 1163 %!testif HAVE_PCRE
1188 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1164 %! assert (regexpi("this word",'(?-x)s w','literalspacing'),4)
1189 %! assert (regexpi("this word",'(?-x)s w','literalspacing'),4) 1165 %! assert (regexpi("this word",'s w','freespacing'),zeros(1,0))
1190 %! assert (regexpi("this word",'s w','freespacing'),zeros(1,0)) 1166 %! assert (regexpi("this word",'(?x)s w'),zeros(1,0))
1191 %! assert (regexpi("this word",'(?x)s w'),zeros(1,0))
1192 %! endif
1193 1167
1194 %!error regexpi('string', 'tri', 'BadArg'); 1168 %!error regexpi('string', 'tri', 'BadArg');
1195 %!error regexpi('string'); 1169 %!error regexpi('string');
1196 1170
1197 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) 1171 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)})
1530 %!test # Check that 'tokenize' is ignored 1504 %!test # Check that 'tokenize' is ignored
1531 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; 1505 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>';
1532 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); 1506 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize');
1533 %! assert(t,' <tag v="hello">some stuff</tag>') 1507 %! assert(t,' <tag v="hello">some stuff</tag>')
1534 1508
1535 %!test # Capture replacement 1509 %!testif HAVE_PCRE # Capture replacement
1536 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) 1510 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
1537 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; 1511 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
1538 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; 1512 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1');
1539 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); 1513 %! assert(t,result)
1540 %! assert(t,result)
1541 %! end
1542 1514
1543 # Return the original if no match 1515 # Return the original if no match
1544 %!assert(regexprep('hello','world','earth'),'hello') 1516 %!assert(regexprep('hello','world','earth'),'hello')
1545 1517
1546 ## Test a general replacement 1518 ## Test a general replacement