1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 package org.exolab.jms.net.uri;
118
119
120
121 import java.io.IOException;
122
123 import java.io.Serializable;
124
125
126
127
128
129 /***
130
131 * A class to represent a Uniform Resource Identifier (URI). This class
132
133 * is designed to handle the parsing of URIs and provide access to
134
135 * the various components (scheme, host, port, userinfo, path, query
136
137 * string and fragment) that may constitute a URI.
138
139 * <p>
140
141 * Parsing of a URI specification is done according to the URI
142
143 * syntax described in RFC 2396
144
145 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
146
147 * of a scheme, followed by a colon (':'), followed by a scheme-specific
148
149 * part. For URIs that follow the "generic URI" syntax, the scheme-
150
151 * specific part begins with two slashes ("//") and may be followed
152
153 * by an authority segment (comprised of user information, host, and
154
155 * port), path segment, query segment and fragment. Note that RFC 2396
156
157 * no longer specifies the use of the parameters segment and excludes
158
159 * the "user:password" syntax as part of the authority segment. If
160
161 * "user:password" appears in a URI, the entire user/password string
162
163 * is stored as userinfo.
164
165 * <p>
166
167 * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
168
169 * the entire scheme-specific part is treated as the "path" portion
170
171 * of the URI.
172
173 * <p>
174
175 * Note that, unlike the java.net.URL class, this class does not provide
176
177 * any built-in network access functionality nor does it provide any
178
179 * scheme-specific functionality (for example, it does not know a
180
181 * default port for a specific scheme). Rather, it only knows the
182
183 * grammar and basic set of operations that can be applied to a URI.
184
185 *
186
187 * @version $Id: URI.java,v 1.1 2004/11/26 01:51:06 tanderson Exp $
188
189 *
190
191 */
192
193 public final class URI
194
195 {
196
197
198
199
200
201 /***
202
203 * MalformedURIExceptions are thrown in the process of building a URI
204
205 * or setting fields on a URI when an operation would result in an
206
207 * invalid URI specification.
208
209 */
210
211 public static class MalformedURIException
212
213 extends IOException
214
215 {
216
217
218
219
220
221 /***
222
223 * Constructs a <code>MalformedURIException</code> with no specified
224
225 * detail message.
226
227 */
228
229 public MalformedURIException()
230
231 {
232
233 super();
234
235 }
236
237
238
239
240
241 /***
242
243 * Constructs a <code>MalformedURIException</code> with the
244
245 * specified detail message.
246
247 *
248
249 * @param message the detail message.
250
251 */
252
253 public MalformedURIException( String message )
254
255 {
256
257 super( message );
258
259 }
260
261
262
263
264
265 }
266
267
268
269
270
271 /***
272
273 * reserved characters
274
275 */
276
277 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
278
279
280
281
282
283 /***
284
285 * URI punctuation mark characters - these, combined with
286
287 * alphanumerics, constitute the "unreserved" characters
288
289 */
290
291 private static final String MARK_CHARACTERS = "-_.!~*'() ";
292
293
294
295
296
297 /***
298
299 * scheme can be composed of alphanumerics and these characters
300
301 */
302
303 private static final String SCHEME_CHARACTERS = "+-.";
304
305
306
307
308
309 /***
310
311 * userinfo can be composed of unreserved, escaped and these
312
313 * characters
314
315 */
316
317 private static final String USERINFO_CHARACTERS = ";:&=+$,";
318
319
320
321
322
323 /***
324
325 * Stores the scheme (usually the protocol) for this URI.
326
327 */
328
329 private String _scheme = null;
330
331
332
333
334
335 /***
336
337 * If specified, stores the userinfo for this URI; otherwise null
338
339 */
340
341 private String _userinfo = null;
342
343
344
345
346
347 /***
348
349 * If specified, stores the host for this URI; otherwise null
350
351 */
352
353 private String _host = null;
354
355
356
357
358
359 /***
360
361 * If specified, stores the port for this URI; otherwise -1
362
363 */
364
365 private int _port = -1;
366
367
368
369
370
371 /***
372
373 * If specified, stores the path for this URI; otherwise null
374
375 */
376
377 private String _path = null;
378
379
380
381
382
383 /***
384
385 * If specified, stores the query string for this URI; otherwise
386
387 * null.
388
389 */
390
391 private String _queryString = null;
392
393
394
395
396
397 /***
398
399 * If specified, stores the fragment for this URI; otherwise null
400
401 */
402
403 private String _fragment = null;
404
405
406
407
408
409 /***
410
411 * Indicate whether in DEBUG mode
412
413 */
414
415 private static boolean DEBUG = false;
416
417
418
419
420
421 /***
422
423 * Construct a new and uninitialized URI.
424
425 */
426
427 public URI()
428
429 {
430
431 }
432
433
434
435
436
437 /***
438
439 * Construct a new URI from another URI. All fields for this URI are
440
441 * set equal to the fields of the URI passed in.
442
443 *
444
445 * @param other the URI to copy (cannot be null)
446
447 */
448
449 public URI( URI other)
450
451 {
452
453 initialize( other );
454
455 }
456
457
458
459
460
461 /***
462
463 * Construct a new URI from a URI specification string. If the
464
465 * specification follows the "generic URI" syntax, (two slashes
466
467 * following the first colon), the specification will be parsed
468
469 * accordingly - setting the scheme, userinfo, host,port, path, query
470
471 * string and fragment fields as necessary. If the specification does
472
473 * not follow the "generic URI" syntax, the specification is parsed
474
475 * into a scheme and scheme-specific part (stored as the path) only.
476
477 *
478
479 * @param uriSpec the URI specification string (cannot be null or empty)
480
481 * @throws MalformedURIException uriSpec violates any syntax rules
482
483 */
484
485 public URI( String uriSpec )
486
487 throws MalformedURIException
488
489 {
490
491 this( (URI) null, uriSpec);
492
493 }
494
495
496
497
498
499 /***
500
501 * Construct a new URI from a base URI and a URI specification string.
502
503 * The URI specification string may be a relative URI.
504
505 *
506
507 * @param base the base URI (cannot be null if uriSpec is null or empty)
508
509 * @param uriSpec the URI specification string (cannot be null or empty
510
511 * if base is null)
512
513 * @throws MalformedURIException uriSpec violates any syntax rules
514
515 */
516
517 public URI( URI base, String uriSpec)
518
519 throws MalformedURIException
520
521 {
522
523 initialize( base, uriSpec );
524
525 }
526
527
528
529
530
531 /***
532
533 * Construct a new URI that does not follow the generic URI syntax.
534
535 * Only the scheme and scheme-specific part (stored as the path) are
536
537 * initialized.
538
539 *
540
541 * @param scheme the URI scheme (cannot be null or empty)
542
543 * @param schemeSpecificPart the scheme-specific part (cannot be
544
545 * null or empty)
546
547 * @throws MalformedURIException scheme violates any syntax rules
548
549 */
550
551 public URI( String scheme, String schemeSpecificPart )
552
553 throws MalformedURIException
554
555 {
556
557 if ( scheme == null || scheme.trim().length() == 0 )
558
559 throw new MalformedURIException( "Argument scheme is null or an empty string" );
560
561 if ( schemeSpecificPart == null || schemeSpecificPart.trim().length() == 0 )
562
563 throw new MalformedURIException( "Argument schemeSpecificPart is null or an empty string" );
564
565 setScheme( scheme );
566
567 setPath( schemeSpecificPart );
568
569 }
570
571
572
573
574
575 /***
576
577 * Construct a new URI that follows the generic URI syntax from its
578
579 * component parts. Each component is validated for syntax and some
580
581 * basic semantic checks are performed as well. See the individual
582
583 * setter methods for specifics.
584
585 *
586
587 * @param scheme the URI scheme (cannot be null or empty)
588
589 * @param host the hostname or IPv4 address for the URI
590
591 * @param path the URI path - if the path contains '?' or '#',
592
593 * then the query string and/or fragment will be set from the path;
594
595 * however, if the query and fragment are specified both in the path
596
597 * and as separate parameters, an exception is thrown
598
599 * @param queryString the URI query string (cannot be specified
600
601 * if path is null)
602
603 * @param fragment the URI fragment (cannot be specified if path is null)
604
605 * @throws MalformedURIException Any of the parameters violates
606
607 * syntax rules or semantic rules
608
609 */
610
611 public URI( String scheme, String host, String path,
612
613 String queryString, String fragment )
614
615 throws MalformedURIException
616
617 {
618
619 this( scheme, null, host, -1, path, queryString, fragment );
620
621 }
622
623
624
625
626
627 /***
628
629 * Construct a new URI that follows the generic URI syntax from its
630
631 * component parts. Each component is validated for syntax and some
632
633 * basic semantic checks are performed as well. See the individual
634
635 * setter methods for specifics.
636
637 *
638
639 * @param scheme the URI scheme (cannot be null or empty)
640
641 * @param userinfo the URI userinfo (cannot be specified if host is null)
642
643 * @param host the hostname or IPv4 address for the URI
644
645 * @param port the URI port (may be -1 for "unspecified"; cannot
646
647 * be specified if host is null)
648
649 * @param path the URI path - if the path contains '?' or '#',
650
651 * then the query string and/or fragment will be set from the path;
652
653 * however, if the query and fragment are specified both in the path
654
655 * and as separate parameters, an exception is thrown
656
657 * @param queryString the URI query string (cannot be specified
658
659 * if path is null)
660
661 * @param fragment the URI fragment (cannot be specified if path is null)
662
663 * @throws MalformedURIException Any of the parameters violates
664
665 * syntax rules or semantic rules
666
667 */
668
669 public URI( String scheme, String userinfo, String host, int port,
670
671 String path, String queryString, String fragment )
672
673 throws MalformedURIException
674
675 {
676
677 if ( scheme == null || scheme.trim().length() == 0 )
678
679 throw new MalformedURIException( "Argument scheme is null or an empty string" );
680
681 if ( host == null ) {
682
683 if ( userinfo != null )
684
685 throw new MalformedURIException( "Argument userInfo must be null if host is null" );
686
687 if ( port != -1 )
688
689 throw new MalformedURIException( "Argument port must be null if host is null" );
690
691 } else if ( host.trim().length() == 0 )
692
693 throw new IllegalArgumentException( "Argument host is an empty string" );
694
695 if ( path != null ) {
696
697 if ( path.indexOf('?') != -1 && queryString != null )
698
699 throw new MalformedURIException( "Argument queryString is illegal if path includes query string" );
700
701 if ( path.indexOf('#') != -1 && fragment != null )
702
703 throw new MalformedURIException( "Argument fragment is illegal if path includes fragment identifier" );
704
705 } else if ( path.trim().length() == 0 )
706
707 throw new IllegalArgumentException( "Argument path is an empty string" );
708
709 setScheme( scheme );
710
711 setHost( host );
712
713 setPort( port );
714
715 setUserinfo( userinfo );
716
717 setPath( path );
718
719 setQueryString( queryString );
720
721 setFragment( fragment );
722
723 }
724
725
726
727
728
729 /***
730
731 * Initialize all fields of this URI from another URI.
732
733 *
734
735 * @param other the URI to copy (cannot be null)
736
737 */
738
739 private void initialize( URI other )
740
741 {
742
743 _scheme = other.getScheme();
744
745 _userinfo = other.getUserinfo();
746
747 _host = other.getHost();
748
749 _port = other.getPort();
750
751 _path = other.getPath();
752
753 _queryString = other.getQueryString();
754
755 _fragment = other.getFragment();
756
757 }
758
759
760
761
762
763 /***
764
765 * Initializes this URI from a base URI and a URI specification string.
766
767 * See RFC 2396 Section 4 and Appendix B for specifications on parsing
768
769 * the URI and Section 5 for specifications on resolving relative URIs
770
771 * and relative paths.
772
773 *
774
775 * @param base the base URI (may be null if uriSpec is an absolute URI)
776
777 * @param uriSpec the URI spec string which may be an absolute or
778
779 * relative URI (can only be null/empty if base is not null)
780
781 * @throws MalformedURIException base is null and uriSpec is not an
782
783 * absolute URI or uriSpec violates syntax rules
784
785 */
786
787 private void initialize( URI base, String uriSpec )
788
789 throws MalformedURIException
790
791 {
792
793 int uriSpecLen;
794
795 int index;
796
797 int startPos;
798
799 char testChar;
800
801
802
803 if ( base == null && ( uriSpec == null || uriSpec.trim().length() == 0) )
804
805 throw new MalformedURIException( "Argument base is null and argument uriSpec is null or an empty string" );
806
807
808
809 if ( uriSpec == null || uriSpec.trim().length() == 0 ) {
810
811 initialize( base );
812
813 return;
814
815 }
816
817
818
819 uriSpec = uriSpec.trim();
820
821 uriSpecLen = uriSpec.length();
822
823 index = 0;
824
825
826
827
828
829 if ( uriSpec.indexOf( ':' ) == -1 ) {
830
831 if ( base == null )
832
833 throw new MalformedURIException( "No scheme found in URI." );
834
835 } else {
836
837 initializeScheme( uriSpec );
838
839 index = _scheme.length() + 1;
840
841 }
842
843
844
845
846
847 if ( ( index + 1 < uriSpecLen ) && ( uriSpec.substring( index ).startsWith( "//" ) ) ) {
848
849 index += 2;
850
851 startPos = index;
852
853
854
855 testChar = '\0';
856
857 while ( index < uriSpecLen ) {
858
859 testChar = uriSpec.charAt( index );
860
861 if ( testChar == '/' || testChar == '?' || testChar == '#' )
862
863 break;
864
865 index++;
866
867 }
868
869
870
871
872
873
874
875 if ( index > startPos )
876
877 initializeAuthority( uriSpec.substring( startPos, index ) );
878
879 else
880
881 _host = "";
882
883 }
884
885
886
887 initializePath( uriSpec.substring( index ) );
888
889
890
891
892
893
894
895
896
897
898
899
900
901 if ( base != null ) {
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919 if ( _path.length() == 0 && _scheme == null && _host == null ) {
920
921 _scheme = base.getScheme();
922
923 _userinfo = base.getUserinfo();
924
925 _host = base.getHost();
926
927 _port = base.getPort();
928
929 _path = base.getPath();
930
931 if ( _queryString == null )
932
933 _queryString = base.getQueryString();
934
935 return;
936
937 }
938
939
940
941
942
943
944
945 if ( _scheme == null )
946
947 _scheme = base.getScheme();
948
949 else
950
951 return;
952
953
954
955
956
957
958
959 if ( _host == null ) {
960
961 _userinfo = base.getUserinfo();
962
963 _host = base.getHost();
964
965 _port = base.getPort();
966
967 } else
968
969 return;
970
971
972
973
974
975 if ( _path.length() > 0 && _path.startsWith( "/" ) )
976
977 return;
978
979
980
981
982
983
984
985 String tmpPath = new String();
986
987 String basePath = base.getPath();
988
989
990
991
992
993 if ( basePath != null ) {
994
995 int lastSlash = basePath.lastIndexOf( '/' );
996
997 if ( lastSlash != -1 )
998
999 tmpPath = basePath.substring( 0, lastSlash + 1 );
1000
1001 }
1002
1003
1004
1005
1006
1007 tmpPath = tmpPath.concat( tmpPath );
1008
1009
1010
1011 index = -1;
1012
1013 while ( ( index = tmpPath.indexOf( "/./" ) ) != -1 )
1014
1015 tmpPath = tmpPath.substring( 0, index + 1 ).concat( tmpPath.substring( index + 3 ) );
1016
1017
1018
1019
1020
1021 if ( tmpPath.endsWith("/.") )
1022
1023 tmpPath = tmpPath.substring( 0, tmpPath.length() - 1 );
1024
1025
1026
1027
1028
1029
1030
1031 index = -1;
1032
1033
1034
1035 int segIndex = -1;
1036
1037 String tempString = null;
1038
1039
1040
1041 while ( ( index = tmpPath.indexOf( "/../" ) ) > 0 ) {
1042
1043 tempString = tmpPath.substring( 0, tmpPath.indexOf( "/../" ) );
1044
1045 segIndex = tempString.lastIndexOf( '/' );
1046
1047 if ( segIndex != -1 )
1048
1049 if ( !tempString.substring( segIndex++ ).equals( ".." ) )
1050
1051 tmpPath = tmpPath.substring( 0, segIndex ).concat( tmpPath.substring( index + 4 ) );
1052
1053 }
1054
1055
1056
1057
1058
1059
1060
1061 if ( tmpPath.endsWith( "/.." ) ) {
1062
1063 tempString = tmpPath.substring( 0, tmpPath.length() - 3 );
1064
1065 segIndex = tempString.lastIndexOf( '/' );
1066
1067 if ( segIndex != -1 )
1068
1069 tmpPath = tmpPath.substring( 0, segIndex + 1 );
1070
1071 }
1072
1073 _path = tmpPath;
1074
1075 }
1076
1077 }
1078
1079
1080
1081
1082
1083 /***
1084
1085 * Initialize the scheme for this URI from a URI string spec.
1086
1087 *
1088
1089 * @param uriSpec the URI specification (cannot be null)
1090
1091 * @throws MalformedURIException URI does not have a conformant scheme
1092
1093 */
1094
1095 private void initializeScheme( String uriSpec )
1096
1097 throws MalformedURIException
1098
1099 {
1100
1101 int uriSpecLen = uriSpec.length();
1102
1103 int index = 0;
1104
1105 String scheme = null;
1106
1107 char testChar = '\0';
1108
1109
1110
1111 while ( index < uriSpecLen ) {
1112
1113 testChar = uriSpec.charAt( index );
1114
1115 if ( testChar == ':' || testChar == '/' || testChar == '?' || testChar == '#' )
1116
1117 break;
1118
1119 index++;
1120
1121 }
1122
1123 scheme = uriSpec.substring( 0, index );
1124
1125 if ( scheme.length() == 0 )
1126
1127 throw new MalformedURIException( "No scheme found in URI." );
1128
1129 else
1130
1131 setScheme( scheme );
1132
1133 }
1134
1135
1136
1137
1138
1139 /***
1140
1141 * Initialize the authority (userinfo, host and port) for this
1142
1143 * URI from a URI string spec.
1144
1145 *
1146
1147 * @param uriSpec the URI specification (cannot be null)
1148
1149 * @throws MalformedURIException uriSpec violates syntax rules
1150
1151 */
1152
1153 private void initializeAuthority( String uriSpec )
1154
1155 throws MalformedURIException
1156
1157 {
1158
1159 int index = 0;
1160
1161 int start = 0;
1162
1163 int end = uriSpec.length();
1164
1165 char testChar = '\0';
1166
1167 String userinfo = null;
1168
1169
1170
1171
1172
1173 if ( uriSpec.indexOf( '@', start ) != -1 ) {
1174
1175 while ( index < end ) {
1176
1177 testChar = uriSpec.charAt( index );
1178
1179 if ( testChar == '@' )
1180
1181 break;
1182
1183 index++;
1184
1185 }
1186
1187 userinfo = uriSpec.substring( start, index );
1188
1189 index++;
1190
1191 }
1192
1193
1194
1195
1196
1197 String host = null;
1198
1199
1200
1201 start = index;
1202
1203 while ( index < end ) {
1204
1205 testChar = uriSpec.charAt( index );
1206
1207 if ( testChar == ':' )
1208
1209 break;
1210
1211 index++;
1212
1213 }
1214
1215 host = uriSpec.substring( start, index );
1216
1217
1218
1219 int port = -1;
1220
1221
1222
1223 if ( host.length() > 0 ) {
1224
1225
1226
1227 if ( testChar == ':' ) {
1228
1229 index++;
1230
1231 start = index;
1232
1233 while ( index < end )
1234
1235 index++;
1236
1237
1238
1239 String portStr = uriSpec.substring( start, index );
1240
1241 if ( portStr.length() > 0 ) {
1242
1243 for ( int i = 0 ; i < portStr.length() ; i++ )
1244
1245 if ( !isDigit( portStr.charAt( i ) ) )
1246
1247 throw new MalformedURIException( portStr + " is invalid. Port should only contain digits!" );
1248
1249 try {
1250
1251 port = Integer.parseInt( portStr );
1252
1253 } catch ( NumberFormatException nfe ) {
1254
1255
1256
1257 }
1258
1259 }
1260
1261 }
1262
1263 }
1264
1265
1266
1267 setHost( host );
1268
1269 setPort( port );
1270
1271 setUserinfo( userinfo );
1272
1273 }
1274
1275
1276
1277
1278
1279 /***
1280
1281 * Initialize the path for this URI from a URI string spec.
1282
1283 *
1284
1285 * @param uriSpec the URI specification (cannot be null)
1286
1287 * @throws MalformedURIException uriSpec violates syntax rules
1288
1289 */
1290
1291 private void initializePath( String uriSpec )
1292
1293 throws MalformedURIException
1294
1295 {
1296
1297 if ( uriSpec == null )
1298
1299 throw new MalformedURIException( "Argument uriSpec is null" );
1300
1301
1302
1303 int index = 0;
1304
1305 int start = 0;
1306
1307 int end = uriSpec.length();
1308
1309 char testChar = '\0';
1310
1311
1312
1313
1314
1315 while ( index < end ) {
1316
1317 testChar = uriSpec.charAt( index );
1318
1319 if ( testChar == '?' || testChar == '#' )
1320
1321 break;
1322
1323
1324
1325 if ( testChar == '%' ) {
1326
1327 if ( index + 2 >= end || ! isHex( uriSpec.charAt( index + 1 ) ) ||
1328
1329 ! isHex( uriSpec.charAt( index + 2 ) ) )
1330
1331 throw new MalformedURIException( "Path contains invalid escape sequence!" );
1332
1333 } else if ( ! isReservedCharacter( testChar ) &&
1334
1335 ! isUnreservedCharacter( testChar ) ) {
1336
1337 if ( '//' != testChar )
1338
1339 throw new MalformedURIException( "Path contains invalid character: " + testChar );
1340
1341 }
1342
1343 index++;
1344
1345 }
1346
1347 _path = uriSpec.substring( start, index );
1348
1349
1350
1351
1352
1353 if ( testChar == '?' ) {
1354
1355 index++;
1356
1357 start = index;
1358
1359 while ( index < end ) {
1360
1361 testChar = uriSpec.charAt( index );
1362
1363 if ( testChar == '#' )
1364
1365 break;
1366
1367 if ( testChar == '%' ) {
1368
1369 if ( index + 2 >= end || ! isHex( uriSpec.charAt( index + 1 ) ) ||
1370
1371 ! isHex( uriSpec.charAt( index + 2 ) ) )
1372
1373 throw new MalformedURIException( "Query string contains invalid escape sequence!" );
1374
1375 } else if ( ! isReservedCharacter( testChar ) &&
1376
1377 ! isUnreservedCharacter( testChar ) )
1378
1379 throw new MalformedURIException( "Query string contains invalid character:" + testChar );
1380
1381 index++;
1382
1383 }
1384
1385 _queryString = uriSpec.substring( start, index );
1386
1387 }
1388
1389
1390
1391
1392
1393 if ( testChar == '#' ) {
1394
1395 index++;
1396
1397 start = index;
1398
1399 while ( index < end ) {
1400
1401 testChar = uriSpec.charAt( index );
1402
1403 if ( testChar == '%' ) {
1404
1405 if ( index + 2 >= end || ! isHex( uriSpec.charAt( index + 1 ) ) ||
1406
1407 !isHex( uriSpec.charAt( index + 2 ) ) )
1408
1409 throw new MalformedURIException( "Fragment contains invalid escape sequence!" );
1410
1411 } else if ( ! isReservedCharacter( testChar ) &&
1412
1413 ! isUnreservedCharacter( testChar ) )
1414
1415 throw new MalformedURIException( "Fragment contains invalid character:" + testChar );
1416
1417 index++;
1418
1419 }
1420
1421 _fragment = uriSpec.substring( start, index );
1422
1423 }
1424
1425 }
1426
1427
1428
1429
1430
1431 /***
1432
1433 * Get the scheme for this URI.
1434
1435 *
1436
1437 * @return the scheme for this URI
1438
1439 */
1440
1441 public String getScheme()
1442
1443 {
1444
1445 return _scheme;
1446
1447 }
1448
1449
1450
1451
1452
1453 /***
1454
1455 * Get the scheme-specific part for this URI (everything following the
1456
1457 * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
1458
1459 *
1460
1461 * @return the scheme-specific part for this URI
1462
1463 */
1464
1465 public String getSchemeSpecificPart()
1466
1467 {
1468
1469 StringBuffer schemespec = new StringBuffer();
1470
1471 if ( _userinfo != null || _host != null || _port != -1 ) {
1472
1473 schemespec.append( "//" );
1474
1475 if ( _userinfo != null) {
1476
1477 schemespec.append( _userinfo );
1478
1479 schemespec.append( '@' );
1480
1481 }
1482
1483 if ( _host != null )
1484
1485 schemespec.append( _host );
1486
1487 if ( _port != -1 ) {
1488
1489 schemespec.append( ':' );
1490
1491 schemespec.append( _port );
1492
1493 }
1494
1495 }
1496
1497 if ( _path != null )
1498
1499 schemespec.append( _path );
1500
1501 if ( _queryString != null ) {
1502
1503 schemespec.append( '?' );
1504
1505 schemespec.append( _queryString );
1506
1507 }
1508
1509 if ( _fragment != null ) {
1510
1511 schemespec.append( '#' );
1512
1513 schemespec.append( _fragment );
1514
1515 }
1516
1517 return schemespec.toString();
1518
1519 }
1520
1521
1522
1523
1524
1525 /***
1526
1527 * Get the userinfo for this URI.
1528
1529 *
1530
1531 * @return the userinfo for this URI (null if not specified).
1532
1533 */
1534
1535 public String getUserinfo()
1536
1537 {
1538
1539 return _userinfo;
1540
1541 }
1542
1543
1544
1545
1546
1547 /***
1548
1549 * Get the host for this URI.
1550
1551 *
1552
1553 * @return the host for this URI (null if not specified).
1554
1555 */
1556
1557 public String getHost()
1558
1559 {
1560
1561 return _host;
1562
1563 }
1564
1565
1566
1567
1568
1569 /***
1570
1571 * Get the port for this URI.
1572
1573 *
1574
1575 * @return the port for this URI (-1 if not specified).
1576
1577 */
1578
1579 public int getPort()
1580
1581 {
1582
1583 return _port;
1584
1585 }
1586
1587
1588
1589
1590
1591 /***
1592
1593 * Get the path for this URI (optionally with the query string and
1594
1595 * fragment).
1596
1597 *
1598
1599 * @param includeQueryString if true (and query string is not null),
1600
1601 * then a "?" followed by the query string will be appended
1602
1603 * @param includeFragment if true (and fragment is not null),
1604
1605 * then a "#" followed by the fragment will be appended
1606
1607 * @return the path for this URI possibly including the query string and fragment
1608
1609 */
1610
1611 public String getPath( boolean includeQueryString,
1612
1613 boolean includeFragment )
1614
1615 {
1616
1617 StringBuffer pathString = new StringBuffer( _path );
1618
1619 if ( includeQueryString && _queryString != null ) {
1620
1621 pathString.append( '?' );
1622
1623 pathString.append( _queryString );
1624
1625 }
1626
1627 if ( includeFragment && _fragment != null ) {
1628
1629 pathString.append( '#' );
1630
1631 pathString.append( _fragment );
1632
1633 }
1634
1635 return pathString.toString();
1636
1637 }
1638
1639
1640
1641
1642
1643 /***
1644
1645 * Get the path for this URI. Note that the value returned is the path
1646
1647 * only and does not include the query string or fragment.
1648
1649 *
1650
1651 * @return the path for this URI.
1652
1653 */
1654
1655 public String getPath()
1656
1657 {
1658
1659 return _path;
1660
1661 }
1662
1663
1664
1665
1666
1667 /***
1668
1669 * Get the query string for this URI.
1670
1671 *
1672
1673 * @return the query string for this URI. Null is returned if there
1674
1675 * was no "?" in the URI spec, empty string if there was a "?" but no
1676
1677 * query string following it.
1678
1679 */
1680
1681 public String getQueryString()
1682
1683 {
1684
1685 return _queryString;
1686
1687 }
1688
1689
1690
1691
1692
1693 /***
1694
1695 * Get the fragment for this URI.
1696
1697 *
1698
1699 * @return the fragment for this URI. Null is returned if there
1700
1701 * was no "#" in the URI spec, empty string if there was a
1702
1703 * "#" but no fragment following it.
1704
1705 */
1706
1707 public String getFragment()
1708
1709 {
1710
1711 return _fragment;
1712
1713 }
1714
1715
1716
1717
1718
1719 /***
1720
1721 * Set the scheme for this URI. The scheme is converted to lowercase
1722
1723 * before it is set.
1724
1725 *
1726
1727 * @param scheme the scheme for this URI (cannot be null)
1728
1729 * @throws MalformedURIException scheme is not a conformant scheme name
1730
1731 */
1732
1733 public void setScheme( String scheme )
1734
1735 throws MalformedURIException
1736
1737 {
1738
1739 if ( scheme == null )
1740
1741 throw new MalformedURIException( "Argument scheme is null" );
1742
1743 if ( ! isConformantSchemeName( scheme ) )
1744
1745 throw new MalformedURIException( "The scheme is not conformant." );
1746
1747 _scheme = scheme.toLowerCase();
1748
1749 }
1750
1751
1752
1753
1754
1755 /***
1756
1757 * Set the userinfo for this URI. If a non-null value is passed in and
1758
1759 * the host value is null, then an exception is thrown.
1760
1761 *
1762
1763 * @param userinfo the userinfo for this URI
1764
1765 * @throws MalformedURIException userinfo contains invalid characters
1766
1767 */
1768
1769 public void setUserinfo( String userinfo )
1770
1771 throws MalformedURIException
1772
1773 {
1774
1775 if ( userinfo == null )
1776
1777 _userinfo = null;
1778
1779 else {
1780
1781 if ( _host == null)
1782
1783 throw new MalformedURIException( "Userinfo cannot be set when host is null!" );
1784
1785
1786
1787
1788
1789 int index = 0;
1790
1791 int end = userinfo.length();
1792
1793 char testChar = '\0';
1794
1795
1796
1797 while ( index < end ) {
1798
1799 testChar = userinfo.charAt( index );
1800
1801 if ( testChar == '%' ) {
1802
1803 if ( index + 2 >= end || ! isHex( userinfo.charAt( index + 1 ) ) ||
1804
1805 ! isHex( userinfo.charAt( index + 2 ) ) )
1806
1807 throw new MalformedURIException( "Userinfo contains invalid escape sequence!" );
1808
1809 } else if ( ! isUnreservedCharacter( testChar ) && USERINFO_CHARACTERS.indexOf( testChar ) == -1 )
1810
1811 throw new MalformedURIException( "Userinfo contains invalid character:" + testChar );
1812
1813 index++;
1814
1815 }
1816
1817 }
1818
1819 _userinfo = userinfo;
1820
1821 }
1822
1823
1824
1825
1826
1827 /***
1828
1829 * Set the host for this URI. If null is passed in, the userinfo
1830
1831 * field is also set to null and the port is set to -1.
1832
1833 *
1834
1835 * @param host the host for this URI
1836
1837 * @throws MalformedURIException host is not a valid IP address or DNS hostname.
1838
1839 */
1840
1841 public void setHost( String host )
1842
1843 throws MalformedURIException
1844
1845 {
1846
1847 if ( host == null || host.trim().length() == 0 ) {
1848
1849 _host = host;
1850
1851 _userinfo = null;
1852
1853 _port = -1;
1854
1855 } else if ( ! isWellFormedAddress( host ) )
1856
1857 throw new MalformedURIException( "Host is not a well formed address!" );
1858
1859 _host = host;
1860
1861 }
1862
1863
1864
1865
1866
1867 /***
1868
1869 * Set the port for this URI. -1 is used to indicate that the port is
1870
1871 * not specified, otherwise valid port numbers are between 0 and 65535.
1872
1873 * If a valid port number is passed in and the host field is null,
1874
1875 * an exception is thrown.
1876
1877 *
1878
1879 * @param port the port number for this URI
1880
1881 * @throws MalformedURIException port is not -1 and not a valid port number
1882
1883 */
1884
1885 public void setPort( int port )
1886
1887 throws MalformedURIException
1888
1889 {
1890
1891 if ( port >= 0 && port <= 65535 ) {
1892
1893 if ( _host == null )
1894
1895 throw new MalformedURIException( "Port cannot be set when host is null!" );
1896
1897 } else if ( port != -1 )
1898
1899 throw new MalformedURIException( "Invalid port number!" );
1900
1901 _port = port;
1902
1903 }
1904
1905
1906
1907
1908
1909 /***
1910
1911 * Set the path for this URI. If the supplied path is null, then the
1912
1913 * query string and fragment are set to null as well. If the supplied
1914
1915 * path includes a query string and/or fragment, these fields will be
1916
1917 * parsed and set as well. Note that, for URIs following the "generic
1918
1919 * URI" syntax, the path specified should start with a slash.
1920
1921 * For URIs that do not follow the generic URI syntax, this method
1922
1923 * sets the scheme-specific part.
1924
1925 *
1926
1927 * @param path the path for this URI (may be null)
1928
1929 * @throws MalformedURIException path contains invalid characters
1930
1931 */
1932
1933 public void setPath( String path )
1934
1935 throws MalformedURIException
1936
1937 {
1938
1939 if ( path == null ) {
1940
1941 _path = null;
1942
1943 _queryString = null;
1944
1945 _fragment = null;
1946
1947 } else
1948
1949 initializePath( path );
1950
1951 }
1952
1953
1954
1955
1956
1957 /***
1958
1959 * Append to the end of the path of this URI. If the current path does
1960
1961 * not end in a slash and the path to be appended does not begin with
1962
1963 * a slash, a slash will be appended to the current path before the
1964
1965 * new segment is added. Also, if the current path ends in a slash
1966
1967 * and the new segment begins with a slash, the extra slash will be
1968
1969 * removed before the new segment is appended.
1970
1971 *
1972
1973 * @param addToPath the new segment to be added to the current path
1974
1975 * @exception MalformedURIException addToPath contains syntax errors
1976
1977 */
1978
1979 public void appendPath( String addToPath )
1980
1981 throws MalformedURIException
1982
1983 {
1984
1985 if ( addToPath == null || addToPath.trim().length() == 0 )
1986
1987 return;
1988
1989 if ( ! isURIString( addToPath ) )
1990
1991 throw new MalformedURIException( "Path contains invalid character!" );
1992
1993 if ( _path == null || _path.trim().length() == 0 ) {
1994
1995 if ( addToPath.startsWith( "/" ) )
1996
1997 _path = addToPath;
1998
1999 else
2000
2001 _path = "/" + addToPath;
2002
2003 } else if ( _path.endsWith( "/" ) ) {
2004
2005 if ( addToPath.startsWith( "/" ) )
2006
2007 _path = _path.concat( addToPath.substring( 1 ) );
2008
2009 else
2010
2011 _path = _path.concat( addToPath );
2012
2013 } else {
2014
2015 if ( addToPath.startsWith( "/" ) )
2016
2017 _path = _path.concat( addToPath );
2018
2019 else
2020
2021 _path = _path.concat( "/" + addToPath );
2022
2023 }
2024
2025 }
2026
2027
2028
2029
2030
2031 /***
2032
2033 * Set the query string for this URI. A non-null value is valid only
2034
2035 * if this is an URI conforming to the generic URI syntax and
2036
2037 * the path value is not null.
2038
2039 *
2040
2041 * @param queryString the query string for this URI
2042
2043 * @exception MalformedURIException queryString is not null and this
2044
2045 * URI does not conform to the generic URI syntax or if the path is null
2046
2047 */
2048
2049 public void setQueryString( String queryString )
2050
2051 throws MalformedURIException
2052
2053 {
2054
2055 if ( queryString == null )
2056
2057 _queryString = null;
2058
2059 else if ( ! isGenericURI() )
2060
2061 throw new MalformedURIException( "Query string can only be set for a generic URI!" );
2062
2063 else if ( getPath() == null )
2064
2065 throw new MalformedURIException( "Query string cannot be set when path is null!" );
2066
2067 else if ( ! isURIString( queryString ) )
2068
2069 throw new MalformedURIException( "Query string contains invalid character!" );
2070
2071 else
2072
2073 _queryString = queryString;
2074
2075 }
2076
2077
2078
2079
2080
2081 /***
2082
2083 * Set the fragment for this URI. A non-null value is valid only
2084
2085 * if this is a URI conforming to the generic URI syntax and
2086
2087 * the path value is not null.
2088
2089 *
2090
2091 * @param fragment the fragment for this URI
2092
2093 * @exception MalformedURIException fragment is not null and this
2094
2095 * URI does not conform to the generic URI syntax or if the path is null
2096
2097 */
2098
2099 public void setFragment( String fragment )
2100
2101 throws MalformedURIException
2102
2103 {
2104
2105 if ( fragment == null )
2106
2107 _fragment = null;
2108
2109 else if ( ! isGenericURI() )
2110
2111 throw new MalformedURIException( "Fragment can only be set for a generic URI!" );
2112
2113 else if ( getPath() == null )
2114
2115 throw new MalformedURIException( "Fragment cannot be set when path is null!" );
2116
2117 else if ( ! isURIString( fragment ) )
2118
2119 throw new MalformedURIException( "Fragment contains invalid character!" );
2120
2121 else
2122
2123 _fragment = fragment;
2124
2125 }
2126
2127
2128
2129
2130
2131 /***
2132
2133 * Determines if the passed-in Object is equivalent to this URI.
2134
2135 *
2136
2137 * @param test the Object to test for equality.
2138
2139 * @return true if test is a URI with all values equal to this
2140
2141 * URI, false otherwise
2142
2143 */
2144
2145 public boolean equals( Object test )
2146
2147 {
2148
2149 if ( test instanceof URI ) {
2150
2151 URI testURI = (URI) test;
2152
2153 return ( ( ( _scheme == null && testURI._scheme == null ) ||
2154
2155 ( _scheme != null && testURI._scheme != null && _scheme.equals( testURI._scheme) ) ) &&
2156
2157 ( ( _userinfo == null && testURI._userinfo == null ) ||
2158
2159 ( _userinfo != null && testURI._userinfo != null && _userinfo.equals( testURI._userinfo ) ) ) &&
2160
2161 ( ( _host == null && testURI._host == null ) ||
2162
2163 ( _host != null && testURI._host != null && _host.equals( testURI._host ) ) ) &&
2164
2165 _port == testURI._port &&
2166
2167 ( ( _path == null && testURI._path == null ) ||
2168
2169 ( _path != null && testURI._path != null && _path.equals( testURI._path ) ) ) &&
2170
2171 ( ( _queryString == null && testURI._queryString == null ) ||
2172
2173 ( _queryString != null && testURI._queryString != null &&
2174
2175 _queryString.equals( testURI._queryString ) ) ) &&
2176
2177 ( ( _fragment == null && testURI._fragment == null ) ||
2178
2179 ( _fragment != null && testURI._fragment != null && _fragment.equals( testURI._fragment ) ) ) );
2180
2181 }
2182
2183 return false;
2184
2185 }
2186
2187
2188
2189
2190
2191 /***
2192
2193 * Get the URI as a string specification. See RFC 2396 Section 5.2.
2194
2195 *
2196
2197 * @return the URI string specification
2198
2199 */
2200
2201 public String toString()
2202
2203 {
2204
2205 StringBuffer uriSpecString = new StringBuffer();
2206
2207
2208
2209 if ( _scheme != null ) {
2210
2211 uriSpecString.append( _scheme );
2212
2213 uriSpecString.append( ':' );
2214
2215 }
2216
2217 uriSpecString.append( getSchemeSpecificPart() );
2218
2219 return uriSpecString.toString();
2220
2221 }
2222
2223 /***
2224 * Returns the hash code of this URI
2225 *
2226 * @return the hash code of this URI
2227 */
2228 public int hashCode() {
2229 return toString().hashCode();
2230 }
2231
2232 /***
2233
2234 * Get the indicator as to whether this URI uses the "generic URI"
2235
2236 * syntax.
2237
2238 *
2239
2240 * @return true if this URI uses the "generic URI" syntax, false otherwise
2241
2242 */
2243
2244 public boolean isGenericURI()
2245
2246 {
2247
2248
2249
2250
2251
2252 return ( _host != null );
2253
2254 }
2255
2256
2257
2258
2259
2260 /***
2261
2262 * Determine whether a scheme conforms to the rules for a scheme name.
2263
2264 * A scheme is conformant if it starts with an alphanumeric, and
2265
2266 * contains only alphanumerics, '+','-' and '.'.
2267
2268 *
2269
2270 *
2271
2272 * @param scheme The sheme name to check
2273
2274 * @return true if the scheme is conformant, false otherwise
2275
2276 */
2277
2278 public static boolean isConformantSchemeName( String scheme )
2279
2280 {
2281
2282 if ( scheme == null || scheme.trim().length() == 0 )
2283
2284 return false;
2285
2286 if ( ! isAlpha( scheme.charAt( 0 ) ) )
2287
2288 return false;
2289
2290 char testChar;
2291
2292 for ( int i = 1 ; i < scheme.length() ; i++ ) {
2293
2294 testChar = scheme.charAt( i );
2295
2296 if ( ! isAlphanum( testChar ) && SCHEME_CHARACTERS.indexOf( testChar ) == -1 )
2297
2298 return false;
2299
2300 }
2301
2302 return true;
2303
2304 }
2305
2306
2307
2308
2309
2310 /***
2311
2312 * Determine whether a string is syntactically capable of representing
2313
2314 * a valid IPv4 address or the domain name of a network host. A valid
2315
2316 * IPv4 address consists of four decimal digit groups separated by a
2317
2318 * '.'. A hostname consists of domain labels (each of which must
2319
2320 * begin and end with an alphanumeric but may contain '-') separated
2321
2322 * & by a '.'. See RFC 2396 Section 3.2.2.
2323
2324 *
2325
2326 * @param address The address string to check
2327
2328 * @return true if the string is a syntactically valid IPv4 address or hostname
2329
2330 */
2331
2332 public static boolean isWellFormedAddress( String address )
2333
2334 {
2335
2336 char testChar;
2337
2338
2339
2340 if ( address == null )
2341
2342 return false;
2343
2344 address = address.trim();
2345
2346 int addrLength = address.length();
2347
2348
2349
2350 if ( addrLength == 0 || addrLength > 255 )
2351
2352 return false;
2353
2354
2355
2356 if ( address.startsWith( "." ) || address.startsWith( "-" ) )
2357
2358 return false;
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368 int index = address.lastIndexOf( '.' );
2369
2370 if ( address.endsWith( "." ) )
2371
2372 index = address.substring( 0, index ).lastIndexOf( '.' );
2373
2374 if ( index + 1 < addrLength && isDigit( address.charAt( index + 1 ) ) ) {
2375
2376 int numDots = 0;
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386 for ( int i = 0 ; i < addrLength ; i++) {
2387
2388 testChar = address.charAt( i );
2389
2390 if ( testChar == '.' ) {
2391
2392 if ( ! isDigit( address.charAt( i - 1 ) ) ||
2393
2394 ( i + 1 < addrLength && ! isDigit( address.charAt( i + 1 ) ) ) )
2395
2396 return false;
2397
2398
2399
2400 numDots++;
2401
2402 } else if ( ! isDigit( testChar ) )
2403
2404 return false;
2405
2406 }
2407
2408 if ( numDots != 3 )
2409
2410 return false;
2411
2412 } else {
2413
2414
2415
2416
2417
2418 for ( int i = 0 ; i < addrLength ; i++ ) {
2419
2420 testChar = address.charAt( i );
2421
2422 if ( testChar == '.' ) {
2423
2424 if ( ! isAlphanum( address.charAt( i - 1 ) ) )
2425
2426 return false;
2427
2428 if ( i + 1 < addrLength && ! isAlphanum( address.charAt( i + 1 ) ) )
2429
2430 return false;
2431
2432 } else if ( ! isAlphanum( testChar ) && testChar != '-' )
2433
2434 return false;
2435
2436 }
2437
2438 }
2439
2440 return true;
2441
2442 }
2443
2444
2445
2446
2447
2448 /***
2449
2450 * Determine whether a char is a digit.
2451
2452 *
2453
2454 * @param ch the character to check
2455
2456 * @return true if the char is betweeen '0' and '9', false otherwise
2457
2458 */
2459
2460 private static boolean isDigit( char ch )
2461
2462 {
2463
2464 return ch >= '0' && ch <= '9';
2465
2466 }
2467
2468
2469
2470
2471
2472 /***
2473
2474 * Determine whether a character is a hexadecimal character.
2475
2476 *
2477
2478 * @param ch the character to check
2479
2480 * @return true if the char is betweeen '0' and '9', 'a' and 'f'
2481
2482 * or 'A' and 'F', false otherwise
2483
2484 */
2485
2486 private static boolean isHex( char ch )
2487
2488 {
2489
2490 return ( isDigit( ch ) || ( ch >= 'a' && ch <= 'f' ) ||
2491
2492 ( ch >= 'A' && ch <= 'F' ) );
2493
2494 }
2495
2496
2497
2498
2499
2500 /***
2501
2502 * Determine whether a char is an alphabetic character: a-z or A-Z
2503
2504 *
2505
2506 * @param ch the character to check
2507
2508 * @return true if the char is alphabetic, false otherwise
2509
2510 */
2511
2512 private static boolean isAlpha( char ch )
2513
2514 {
2515
2516 return ( ( ch >= 'a' && ch <= 'z' ) ||
2517
2518 ( ch >= 'A' && ch <= 'Z' ) );
2519
2520 }
2521
2522
2523
2524
2525
2526 /***
2527
2528 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
2529
2530 *
2531
2532 * @param ch the character to check
2533
2534 * @return true if the char is alphanumeric, false otherwise
2535
2536 */
2537
2538 private static boolean isAlphanum( char ch )
2539
2540 {
2541
2542 return ( isAlpha( ch ) || isDigit( ch ) );
2543
2544 }
2545
2546
2547
2548
2549
2550 /***
2551
2552 * Determine whether a character is a reserved character:
2553
2554 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
2555
2556 *
2557
2558 * @param ch the character to check
2559
2560 * @return true if the string contains any reserved characters
2561
2562 */
2563
2564 private static boolean isReservedCharacter( char ch )
2565
2566 {
2567
2568 return RESERVED_CHARACTERS.indexOf( ch ) != -1;
2569
2570 }
2571
2572
2573
2574
2575
2576 /***
2577
2578 * Determine whether a char is an unreserved character.
2579
2580 *
2581
2582 * @param ch the character to check
2583
2584 * @return true if the char is unreserved, false otherwise
2585
2586 */
2587
2588 private static boolean isUnreservedCharacter( char ch )
2589
2590 {
2591
2592 return ( isAlphanum( ch ) || MARK_CHARACTERS.indexOf( ch ) != -1 );
2593
2594 }
2595
2596
2597
2598
2599
2600 /***
2601
2602 * Determine whether a given string contains only URI characters (also
2603
2604 * called "uric" in RFC 2396). uric consist of all reserved
2605
2606 * characters, unreserved characters and escaped characters.
2607
2608 *
2609
2610 * @param uric URI string
2611
2612 * @return true if the string is comprised of uric, false otherwise
2613
2614 */
2615
2616 private static boolean isURIString( String uric )
2617
2618 {
2619
2620 if ( uric == null )
2621
2622 return false;
2623
2624 int end = uric.length();
2625
2626 char testChar = '\0';
2627
2628 for ( int i = 0 ; i < end ; i++ ) {
2629
2630 testChar = uric.charAt( i );
2631
2632 if ( testChar == '%' ) {
2633
2634 if ( i + 2 >= end || ! isHex( uric.charAt( i + 1 ) ) ||
2635
2636 ! isHex( uric.charAt( i + 2 ) ) )
2637
2638 return false;
2639
2640 else {
2641
2642 i += 2;
2643
2644 continue;
2645
2646 }
2647
2648 }
2649
2650 if ( isReservedCharacter( testChar ) || isUnreservedCharacter( testChar ) )
2651
2652 continue;
2653
2654 else
2655
2656 return false;
2657
2658 }
2659
2660 return true;
2661
2662 }
2663
2664
2665
2666
2667
2668 }
2669