Mercurial > octave
comparison src/DLD-FUNCTIONS/regexp.cc @ 7242:e4398e3903be
[project @ 2007-12-03 23:52:07 by dbateman]
author | dbateman |
---|---|
date | Mon, 03 Dec 2007 23:52:07 +0000 |
parents | 7e248dd1e6d5 |
children | b166043585a8 35d5c824f1eb |
comparison
equal
deleted
inserted
replaced
7241:da540e715d05 | 7242:e4398e3903be |
---|---|
925 %! assert (isempty(te{1})) | 925 %! assert (isempty(te{1})) |
926 %! assert (m{1},'short') | 926 %! assert (m{1},'short') |
927 %! ## Matlab gives [1,0] here but that seems wrong. | 927 %! ## Matlab gives [1,0] here but that seems wrong. |
928 %! assert (size(t), [1,1]) | 928 %! assert (size(t), [1,1]) |
929 | 929 |
930 %!test | 930 %!testif HAVE_PCRE |
931 %! ## This test is expected to fail if PCRE is not installed | 931 %! ## This test is expected to fail if PCRE is not installed |
932 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 932 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
933 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); | 933 %! assert (s,1) |
934 %! assert (s,1) | 934 %! assert (e,10) |
935 %! assert (e,10) | 935 %! assert (size(te), [1,1]) |
936 %! assert (size(te), [1,1]) | 936 %! assert (te{1}, [1 5; 7, 10]) |
937 %! assert (te{1}, [1 5; 7, 10]) | 937 %! assert (m{1},'short test') |
938 %! assert (m{1},'short test') | 938 %! assert (size(t),[1,1]) |
939 %! assert (size(t),[1,1]) | 939 %! assert (t{1}{1},'short') |
940 %! assert (t{1}{1},'short') | 940 %! assert (t{1}{2},'test') |
941 %! assert (t{1}{2},'test') | 941 %! assert (size(nm), [1,1]) |
942 %! assert (size(nm), [1,1]) | 942 %! assert (!isempty(fieldnames(nm))) |
943 %! assert (!isempty(fieldnames(nm))) | 943 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) |
944 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | 944 %! assert (nm.word1,'short') |
945 %! assert (nm.word1,'short') | 945 %! assert (nm.word2,'test') |
946 %! assert (nm.word2,'test') | 946 |
947 %! endif | 947 %!testif HAVE_PCRE |
948 | |
949 %!test | |
950 %! ## This test is expected to fail if PCRE is not installed | 948 %! ## This test is expected to fail if PCRE is not installed |
951 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 949 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
952 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | 950 %! assert (s,1) |
953 %! assert (s,1) | 951 %! assert (e,10) |
954 %! assert (e,10) | 952 %! assert (size(te), [1,1]) |
955 %! assert (size(te), [1,1]) | 953 %! assert (te{1}, [1 5; 7, 10]) |
956 %! assert (te{1}, [1 5; 7, 10]) | 954 %! assert (m{1},'short test') |
957 %! assert (m{1},'short test') | 955 %! assert (size(t),[1,1]) |
958 %! assert (size(t),[1,1]) | 956 %! assert (t{1}{1},'short') |
959 %! assert (t{1}{1},'short') | 957 %! assert (t{1}{2},'test') |
960 %! assert (t{1}{2},'test') | 958 %! assert (size(nm), [1,1]) |
961 %! assert (size(nm), [1,1]) | 959 %! assert (!isempty(fieldnames(nm))) |
962 %! assert (!isempty(fieldnames(nm))) | 960 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) |
963 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | 961 %! assert (nm.word1,'short') |
964 %! assert (nm.word1,'short') | 962 %! assert (nm.word2,'test') |
965 %! assert (nm.word2,'test') | 963 |
966 %! endif | 964 %!testif HAVE_PCRE |
967 | |
968 %!test | |
969 %! ## This test is expected to fail if PCRE is not installed | 965 %! ## This test is expected to fail if PCRE is not installed |
970 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 966 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); |
971 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); | 967 %! assert (size(t), [1,2]); |
972 %! assert (size(t), [1,2]); | 968 %! assert (t{1}{1},'John'); |
973 %! assert (t{1}{1},'John'); | 969 %! assert (t{1}{2},'Davis'); |
974 %! assert (t{1}{2},'Davis'); | 970 %! assert (t{2}{1},'Rogers'); |
975 %! assert (t{2}{1},'Rogers'); | 971 %! assert (t{2}{2},'James'); |
976 %! assert (t{2}{2},'James'); | 972 %! assert (size(nm), [1,1]); |
977 %! assert (size(nm), [1,1]); | 973 %! assert (nm.first{1},'John'); |
978 %! assert (nm.first{1},'John'); | 974 %! assert (nm.first{2},'James'); |
979 %! assert (nm.first{2},'James'); | 975 %! assert (nm.last{1},'Davis'); |
980 %! assert (nm.last{1},'Davis'); | 976 %! assert (nm.last{2},'Rogers'); |
981 %! assert (nm.last{2},'Rogers'); | |
982 %! endif | |
983 | 977 |
984 %!assert(regexp("abc\nabc",'.'),[1:7]) | 978 %!assert(regexp("abc\nabc",'.'),[1:7]) |
985 %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) | 979 %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) |
986 %!test | 980 %!testif HAVE_PCRE |
987 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 981 %! assert(regexp("abc\nabc",'(?s).'),[1:7]) |
988 %! assert(regexp("abc\nabc",'(?s).'),[1:7]) | 982 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) |
989 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | 983 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) |
990 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
991 %! endif | |
992 | 984 |
993 %!assert(regexp("caseCaSe",'case'),1) | 985 %!assert(regexp("caseCaSe",'case'),1) |
994 %!assert(regexp("caseCaSe",'case',"matchcase"),1) | 986 %!assert(regexp("caseCaSe",'case',"matchcase"),1) |
995 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) | 987 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) |
996 %!test | 988 %!testif HAVE_PCRE |
997 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 989 %! assert(regexp("caseCaSe",'(?-i)case'),1) |
998 %! assert(regexp("caseCaSe",'(?-i)case'),1) | 990 %! assert(regexp("caseCaSe",'(?i)case'),[1,5]) |
999 %! assert(regexp("caseCaSe",'(?i)case'),[1,5]) | |
1000 %! endif | |
1001 | 991 |
1002 %!assert (regexp("abc\nabc",'c$'),7) | 992 %!assert (regexp("abc\nabc",'c$'),7) |
1003 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) | 993 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) |
1004 %!test | 994 %!testif HAVE_PCRE |
1005 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 995 %! assert (regexp("abc\nabc",'(?-m)c$'),7) |
1006 %! assert (regexp("abc\nabc",'(?-m)c$'),7) | 996 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) |
1007 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) | 997 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7]) |
1008 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7]) | |
1009 %! endif | |
1010 | 998 |
1011 %!assert (regexp("this word",'s w'),4) | 999 %!assert (regexp("this word",'s w'),4) |
1012 %!assert (regexp("this word",'s w','literalspacing'),4) | 1000 %!assert (regexp("this word",'s w','literalspacing'),4) |
1013 %!test | 1001 %!testif HAVE_PCRE |
1014 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1002 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) |
1015 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) | 1003 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) |
1016 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) | 1004 %! assert (regexp("this word",'(?x)s w'),zeros(1,0)) |
1017 %! assert (regexp("this word",'(?x)s w'),zeros(1,0)) | |
1018 %! endif | |
1019 | 1005 |
1020 %!error regexp('string', 'tri', 'BadArg'); | 1006 %!error regexp('string', 'tri', 'BadArg'); |
1021 %!error regexp('string'); | 1007 %!error regexp('string'); |
1022 | 1008 |
1023 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) | 1009 %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1115 %! assert (isempty(te{1})) | 1101 %! assert (isempty(te{1})) |
1116 %! assert (m{1},'ShoRt') | 1102 %! assert (m{1},'ShoRt') |
1117 %! ## Matlab gives [1,0] here but that seems wrong. | 1103 %! ## Matlab gives [1,0] here but that seems wrong. |
1118 %! assert (size(t), [1,1]) | 1104 %! assert (size(t), [1,1]) |
1119 | 1105 |
1120 %!test | 1106 %!testif HAVE_PCRE |
1121 %! ## This test is expected to fail if PCRE is not installed | 1107 %! ## This test is expected to fail if PCRE is not installed |
1122 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1108 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); |
1123 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); | 1109 %! assert (s,1) |
1124 %! assert (s,1) | 1110 %! assert (e,10) |
1125 %! assert (e,10) | 1111 %! assert (size(te), [1,1]) |
1126 %! assert (size(te), [1,1]) | 1112 %! assert (te{1}, [1 5; 7, 10]) |
1127 %! assert (te{1}, [1 5; 7, 10]) | 1113 %! assert (m{1},'ShoRt Test') |
1128 %! assert (m{1},'ShoRt Test') | 1114 %! assert (size(t),[1,1]) |
1129 %! assert (size(t),[1,1]) | 1115 %! assert (t{1}{1},'ShoRt') |
1130 %! assert (t{1}{1},'ShoRt') | 1116 %! assert (t{1}{2},'Test') |
1131 %! assert (t{1}{2},'Test') | 1117 %! assert (size(nm), [1,1]) |
1132 %! assert (size(nm), [1,1]) | 1118 %! assert (!isempty(fieldnames(nm))) |
1133 %! assert (!isempty(fieldnames(nm))) | 1119 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) |
1134 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | 1120 %! assert (nm.word1,'ShoRt') |
1135 %! assert (nm.word1,'ShoRt') | 1121 %! assert (nm.word2,'Test') |
1136 %! assert (nm.word2,'Test') | 1122 |
1137 %! endif | 1123 %!testif HAVE_PCRE |
1138 | |
1139 %!test | |
1140 %! ## This test is expected to fail if PCRE is not installed | 1124 %! ## This test is expected to fail if PCRE is not installed |
1141 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1125 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); |
1142 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); | 1126 %! assert (s,1) |
1143 %! assert (s,1) | 1127 %! assert (e,10) |
1144 %! assert (e,10) | 1128 %! assert (size(te), [1,1]) |
1145 %! assert (size(te), [1,1]) | 1129 %! assert (te{1}, [1 5; 7, 10]) |
1146 %! assert (te{1}, [1 5; 7, 10]) | 1130 %! assert (m{1},'ShoRt Test') |
1147 %! assert (m{1},'ShoRt Test') | 1131 %! assert (size(t),[1,1]) |
1148 %! assert (size(t),[1,1]) | 1132 %! assert (t{1}{1},'ShoRt') |
1149 %! assert (t{1}{1},'ShoRt') | 1133 %! assert (t{1}{2},'Test') |
1150 %! assert (t{1}{2},'Test') | 1134 %! assert (size(nm), [1,1]) |
1151 %! assert (size(nm), [1,1]) | 1135 %! assert (!isempty(fieldnames(nm))) |
1152 %! assert (!isempty(fieldnames(nm))) | 1136 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) |
1153 %! assert (sort(fieldnames(nm)),{'word1';'word2'}) | 1137 %! assert (nm.word1,'ShoRt') |
1154 %! assert (nm.word1,'ShoRt') | 1138 %! assert (nm.word2,'Test') |
1155 %! assert (nm.word2,'Test') | |
1156 %! endif | |
1157 | 1139 |
1158 %!assert(regexpi("abc\nabc",'.'),[1:7]) | 1140 %!assert(regexpi("abc\nabc",'.'),[1:7]) |
1159 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) | 1141 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) |
1160 %!test | 1142 %!testif HAVE_PCRE |
1161 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1143 %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) |
1162 %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) | 1144 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) |
1163 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) | 1145 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) |
1164 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) | |
1165 %! endif | |
1166 | 1146 |
1167 %!assert(regexpi("caseCaSe",'case'),[1,5]) | 1147 %!assert(regexpi("caseCaSe",'case'),[1,5]) |
1168 %!assert(regexpi("caseCaSe",'case',"matchcase"),1) | 1148 %!assert(regexpi("caseCaSe",'case',"matchcase"),1) |
1169 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) | 1149 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) |
1170 %!test | 1150 %!testif HAVE_PCRE |
1171 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1151 %! assert(regexpi("caseCaSe",'(?-i)case'),1) |
1172 %! assert(regexpi("caseCaSe",'(?-i)case'),1) | 1152 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5]) |
1173 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5]) | |
1174 %! endif | |
1175 | 1153 |
1176 %!assert (regexpi("abc\nabc",'c$'),7) | 1154 %!assert (regexpi("abc\nabc",'c$'),7) |
1177 %!assert (regexpi("abc\nabc",'c$',"stringanchors"),7) | 1155 %!assert (regexpi("abc\nabc",'c$',"stringanchors"),7) |
1178 %!test | 1156 %!testif HAVE_PCRE |
1179 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1157 %! assert (regexpi("abc\nabc",'(?-m)c$'),7) |
1180 %! assert (regexpi("abc\nabc",'(?-m)c$'),7) | 1158 %! assert (regexpi("abc\nabc",'c$',"lineanchors"),[3,7]) |
1181 %! assert (regexpi("abc\nabc",'c$',"lineanchors"),[3,7]) | 1159 %! assert (regexpi("abc\nabc",'(?m)c$'),[3,7]) |
1182 %! assert (regexpi("abc\nabc",'(?m)c$'),[3,7]) | |
1183 %! endif | |
1184 | 1160 |
1185 %!assert (regexpi("this word",'s w'),4) | 1161 %!assert (regexpi("this word",'s w'),4) |
1186 %!assert (regexpi("this word",'s w','literalspacing'),4) | 1162 %!assert (regexpi("this word",'s w','literalspacing'),4) |
1187 %!test | 1163 %!testif HAVE_PCRE |
1188 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1164 %! assert (regexpi("this word",'(?-x)s w','literalspacing'),4) |
1189 %! assert (regexpi("this word",'(?-x)s w','literalspacing'),4) | 1165 %! assert (regexpi("this word",'s w','freespacing'),zeros(1,0)) |
1190 %! assert (regexpi("this word",'s w','freespacing'),zeros(1,0)) | 1166 %! assert (regexpi("this word",'(?x)s w'),zeros(1,0)) |
1191 %! assert (regexpi("this word",'(?x)s w'),zeros(1,0)) | |
1192 %! endif | |
1193 | 1167 |
1194 %!error regexpi('string', 'tri', 'BadArg'); | 1168 %!error regexpi('string', 'tri', 'BadArg'); |
1195 %!error regexpi('string'); | 1169 %!error regexpi('string'); |
1196 | 1170 |
1197 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) | 1171 %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) |
1530 %!test # Check that 'tokenize' is ignored | 1504 %!test # Check that 'tokenize' is ignored |
1531 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; | 1505 %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; |
1532 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); | 1506 %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); |
1533 %! assert(t,' <tag v="hello">some stuff</tag>') | 1507 %! assert(t,' <tag v="hello">some stuff</tag>') |
1534 | 1508 |
1535 %!test # Capture replacement | 1509 %!testif HAVE_PCRE # Capture replacement |
1536 %! if (!isempty(findstr(octave_config_info ("DEFS"),"HAVE_PCRE"))) | 1510 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; |
1537 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; | 1511 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; |
1538 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; | 1512 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); |
1539 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); | 1513 %! assert(t,result) |
1540 %! assert(t,result) | |
1541 %! end | |
1542 | 1514 |
1543 # Return the original if no match | 1515 # Return the original if no match |
1544 %!assert(regexprep('hello','world','earth'),'hello') | 1516 %!assert(regexprep('hello','world','earth'),'hello') |
1545 | 1517 |
1546 ## Test a general replacement | 1518 ## Test a general replacement |