HDFS-9932: added uriparser2 library to HDFS-8707. Contributed by Bob Hansen.

This commit is contained in:
Bob Hansen 2016-03-14 11:32:23 -04:00 committed by James Clampffer
parent 69ee2e6a7e
commit f25bff50bf
35 changed files with 8759 additions and 0 deletions

View File

@ -117,11 +117,13 @@ include_directories( SYSTEM
third_party/gmock-1.7.0
third_party/tr2
third_party/protobuf
third_party/uriparser2
${OPENSSL_INCLUDE_DIR}
)
add_subdirectory(third_party/gmock-1.7.0)
add_subdirectory(third_party/uriparser2)
add_subdirectory(lib)
add_subdirectory(tests)
add_subdirectory(examples)

View File

@ -0,0 +1,26 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#uripaser lib likes to use always_inline and gcc complains
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-attributes")
add_library(uriparser2_obj OBJECT uriparser2/uriparser2.c uriparser2/uriparser/UriParse.c uriparser2/uriparser/UriParseBase.c
uriparser2/uriparser/UriCommon.c uriparser2/uriparser/UriIp4Base.c uriparser2/uriparser/UriIp4.c)
add_library(uriparser2 $<TARGET_OBJECTS:uriparser2_obj>)

View File

@ -0,0 +1,4 @@
*.so
*.a
*.o
test-uriparser2

View File

@ -0,0 +1,19 @@
Copyright (c) 2010 Ben Noordhuis
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,20 @@
AR =ar
CC =gcc
CFLAGS =-ansi -Wall -O2 -fPIC
OBJS =uriparser2.o uriparser/UriParse.o uriparser/UriParseBase.o uriparser/UriCommon.o uriparser/UriIp4Base.o uriparser/UriIp4.o
all: lib staticlib
test: staticlib
$(CC) -o test-uriparser2 test-uriparser2.c liburiparser2.a
./test-uriparser2
lib: $(OBJS)
$(CC) -shared -o liburiparser2.so $(OBJS)
staticlib: $(OBJS)
$(AR) cr liburiparser2.a $(OBJS)
clean:
rm -rf liburiparser2.so liburiparser2.a test-uriparser2 $(OBJS)

View File

@ -0,0 +1,103 @@
# uriparser2
Your one-stop C and C++ library for URI parsing.
## Why?
There aren't any easy to use libraries right now. The original uriparser[1] works okay but its API is horrendous.
[1] http://uriparser.sourceforge.net/ - uriparser2 uses bits of uriparser but is not affiliated with or endorsed by said library.
## Compiling
Build the dynamic and static library:
make all
## C usage
uriparser2 exposes a straight-forward API to C client code. An excerpt from `uriparser2.h`:
/**
* URI object. After the call to uri_parse() fields will be NULL (0 for the port) if their component was absent in the input string.
*/
typedef struct {
const char *scheme;
const char *user;
const char *pass;
const char *host;
unsigned short port;
const char *path;
const char *query;
const char *fragment;
} URI;
/**
* Parse URI into its components.
*
* @param uri The URI to parse.
* @return URI object. The caller is responsible for freeing this object. NULL is returned on parse error or out-of-memory conditions (in the latter case errno=ENOMEM).
*/
URI *uri_parse(const char *uri);
/**
* Create string representation of URI object.
*
* @param uri URI object.
* @return URI as a string. The caller is responsible for freeing this object. NULL is returned on out-of-memory conditions (errno=ENOMEM).
*/
char *uri_build(const URI *uri);
/**
* Compare two URI objects. Follows the strcmp() contract. The order in which components are compared is as follows: scheme, host, port, path, query, fragment, user, pass.
* NULL components are always smaller than their non-NULL counterparts. That is, a < b if a->scheme == NULL and b->scheme != NULL.
*
* @param a First URI object.
* @param b Second URI object.
* @return -1 if a < b, 0 if a == b, 1 if a > b.
*/
int uri_compare(const URI *a, const URI *b);
Example:
URI *uri = uri_parse("http://github.com/bnoordhuis/uriparser2");
char *s = uri_build(uri);
printf("uri=%s, host=%s, path=%s\n", s, uri->host, uri->path);
free(s);
free(uri);
## C++ usage
An idiomatic RAII class is exposed to C++ client code:
typedef struct URI {
const char *scheme;
const char *user;
const char *pass;
const char *host;
unsigned short port;
const char *path;
const char *query;
const char *fragment;
URI(const char *uri = 0);
~URI();
bool operator<(const URI& uri) const;
bool operator>(const URI& uri) const;
bool operator<=(const URI& uri) const;
bool operator>=(const URI& uri) const;
bool operator==(const URI& uri) const;
bool operator!=(const URI& uri) const;
std::string to_string() const;
} URI;
Example:
URI uri("http://github.com/bnoordhuis/uriparser2");
std::cout << "uri=" << uri << ", host=" << uri->host << ", path=" << uri->path << std::endl;
## License
uriparser2 is MIT-licensed. The bits from the original uriparser - http://uriparser.sourceforge.net/ - are BSD-licensed.

View File

@ -0,0 +1,108 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#undef NDEBUG
#include <assert.h>
#include "uriparser2.h"
static void print(URI *a) {
if (a) {
printf(
"scheme =%s\n"
"user =%s\n"
"pass =%s\n"
"host =%s\n"
"port =%d\n"
"path =%s\n"
"query =%s\n"
"fragment=%s\n",
a->scheme, a->user, a->pass, a->host,
a->port, a->path, a->query, a->fragment);
} else {
printf("null");
}
}
static URI *uri_parse2(const char *uri) {
URI *a = uri_parse(uri);
print(a);
return a;
}
//#define uri_parse(s) uri_parse2(s)
static void simple_test(void) {
URI *a = uri_parse("http://www.google.com/search?q=uriparser#top");
assert(a);
assert(a->port == 0);
assert(!strcmp(a->scheme, "http"));
assert(!strcmp(a->host, "www.google.com"));
assert(!strcmp(a->path, "/search"));
assert(!strcmp(a->query, "q=uriparser"));
assert(!strcmp(a->fragment, "top"));
}
static void multi_segment_path(void) {
URI *a = uri_parse("http://www.example.com/foo/bar/baz");
assert(!strcmp(a->path, "/foo/bar/baz"));
}
static void file_path(void) {
URI *a = uri_parse("file:///foo/bar/baz");
assert(a->host == 0);
assert(!strcmp(a->path, "/foo/bar/baz"));
}
static void port_number(void) {
URI *a = uri_parse("http://localhost:8080/");
assert(a->port == 8080);
assert(!strcmp(a->host, "localhost"));
assert(!strcmp(a->path, "/"));
}
static void user_info(void) {
URI *a = uri_parse("http://foo:bar@localhost:8080/");
assert(!strcmp(a->user, "foo"));
assert(!strcmp(a->pass, "bar"));
}
static void user_info_only_user(void) {
URI *a = uri_parse("http://foo@localhost:8080/");
assert(!strcmp(a->user, "foo"));
assert(a->pass == 0);
}
static void user_info_only_pass(void) {
URI *a = uri_parse("http://:bar@localhost:8080/");
assert(a->user == 0);
assert(!strcmp(a->pass, "bar"));
}
static void recomposed_equals_original_url(void) {
const char *uri = "http://foo:bar@example.com:8080/path/to/resource?q=hello+world&ln=en#top";
URI *a = uri_parse(uri);
char *uri2 = uri_build(a);
assert(!strcmp(uri, uri2));
}
static void equal(void) {
const char *uri = "http://www.google.com/search?q=uriparser2&ln=en#top";
URI *a = uri_parse(uri);
URI *b = uri_parse(uri);
assert(0 == uri_compare(a, b));
}
int main(void) {
simple_test();
multi_segment_path();
file_path();
port_number();
user_info();
user_info_only_user();
user_info_only_pass();
recomposed_equals_original_url();
equal();
puts("All tests OK.");
return 0;
}

View File

@ -0,0 +1,2 @@
Weijia Song <songweijia@gmail.com>
Sebastian Pipping <webmaster@hartwork.org>

View File

@ -0,0 +1,36 @@
uriparser - RFC 3986 URI parsing library
Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the <ORGANIZATION> nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,752 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file Uri.h
* Holds the RFC 3986 %URI parser interface.
* NOTE: This header includes itself twice.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "Uri.h"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "Uri.h"
# undef URI_PASS_UNICODE
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_H_ANSI 1
# include "UriDefsAnsi.h"
# else
# define URI_H_UNICODE 1
# include "UriDefsUnicode.h"
# endif
#ifdef __cplusplus
extern "C" {
#endif
#ifndef URI_DOXYGEN
# include "UriBase.h"
#endif
/**
* Specifies a range of characters within a string.
* The range includes all characters from <c>first</c>
* to one before <c>afterLast</c>. So if both are
* non-NULL the difference is the length of the text range.
*
* @see UriUriA
* @see UriPathSegmentA
* @see UriHostDataA
* @since 0.3.0
*/
typedef struct URI_TYPE(TextRangeStruct) {
const URI_CHAR * first; /**< Pointer to first character */
const URI_CHAR * afterLast; /**< Pointer to character after the last one still in */
} URI_TYPE(TextRange); /**< @copydoc UriTextRangeStructA */
/**
* Represents a path segment within a %URI path.
* More precisely it is a node in a linked
* list of path segments.
*
* @see UriUriA
* @since 0.3.0
*/
typedef struct URI_TYPE(PathSegmentStruct) {
URI_TYPE(TextRange) text; /**< Path segment name */
struct URI_TYPE(PathSegmentStruct) * next; /**< Pointer to the next path segment in the list, can be NULL if last already */
void * reserved; /**< Reserved to the parser */
} URI_TYPE(PathSegment); /**< @copydoc UriPathSegmentStructA */
/**
* Holds structured host information.
* This is either a IPv4, IPv6, plain
* text for IPvFuture or all zero for
* a registered name.
*
* @see UriUriA
* @since 0.3.0
*/
typedef struct URI_TYPE(HostDataStruct) {
UriIp4 * ip4; /**< IPv4 address */
UriIp6 * ip6; /**< IPv6 address */
URI_TYPE(TextRange) ipFuture; /**< IPvFuture address */
} URI_TYPE(HostData); /**< @copydoc UriHostDataStructA */
/**
* Represents an RFC 3986 %URI.
* Missing components can be {NULL, NULL} ranges.
*
* @see uriParseUriA
* @see uriFreeUriMembersA
* @see UriParserStateA
* @since 0.3.0
*/
typedef struct URI_TYPE(UriStruct) {
URI_TYPE(TextRange) scheme; /**< Scheme (e.g. "http") */
URI_TYPE(TextRange) userInfo; /**< User info (e.g. "user:pass") */
URI_TYPE(TextRange) hostText; /**< Host text (set for all hosts, excluding square brackets) */
URI_TYPE(HostData) hostData; /**< Structured host type specific data */
URI_TYPE(TextRange) portText; /**< Port (e.g. "80") */
URI_TYPE(PathSegment) * pathHead; /**< Head of a linked list of path segments */
URI_TYPE(PathSegment) * pathTail; /**< Tail of the list behind pathHead */
URI_TYPE(TextRange) query; /**< Query without leading "?" */
URI_TYPE(TextRange) fragment; /**< Query without leading "#" */
UriBool absolutePath; /**< Absolute path flag, distincting "a" and "/a" */
UriBool owner; /**< Memory owner flag */
void * reserved; /**< Reserved to the parser */
} URI_TYPE(Uri); /**< @copydoc UriUriStructA */
/**
* Represents a state of the %URI parser.
* Missing components can be NULL to reflect
* a components absence.
*
* @see uriFreeUriMembersA
* @since 0.3.0
*/
typedef struct URI_TYPE(ParserStateStruct) {
URI_TYPE(Uri) * uri; /**< Plug in the %URI structure to be filled while parsing here */
int errorCode; /**< Code identifying the occured error */
const URI_CHAR * errorPos; /**< Pointer to position in case of a syntax error */
void * reserved; /**< Reserved to the parser */
} URI_TYPE(ParserState); /**< @copydoc UriParserStateStructA */
/**
* Represents a query element.
* More precisely it is a node in a linked
* list of query elements.
*
* @since 0.7.0
*/
typedef struct URI_TYPE(QueryListStruct) {
const URI_CHAR * key; /**< Key of the query element */
const URI_CHAR * value; /**< Value of the query element, can be NULL */
struct URI_TYPE(QueryListStruct) * next; /**< Pointer to the next key/value pair in the list, can be NULL if last already */
} URI_TYPE(QueryList); /**< @copydoc UriQueryListStructA */
/**
* Parses a RFC 3986 URI.
*
* @param state <b>INOUT</b>: Parser state with set output %URI, must not be NULL
* @param first <b>IN</b>: Pointer to the first character to parse, must not be NULL
* @param afterLast <b>IN</b>: Pointer to the character after the last to parse, must not be NULL
* @return 0 on success, error code otherwise
*
* @see uriParseUriA
* @see uriToStringA
* @since 0.3.0
*/
int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state,
const URI_CHAR * first, const URI_CHAR * afterLast);
/**
* Parses a RFC 3986 %URI.
*
* @param state <b>INOUT</b>: Parser state with set output %URI, must not be NULL
* @param text <b>IN</b>: Text to parse, must not be NULL
* @return 0 on success, error code otherwise
*
* @see uriParseUriExA
* @see uriToStringA
* @since 0.3.0
*/
int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state,
const URI_CHAR * text);
/**
* Frees all memory associated with the members
* of the %URI structure. Note that the structure
* itself is not freed, only its members.
*
* @param uri <b>INOUT</b>: %URI structure whose members should be freed
*
* @since 0.3.0
*/
void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri);
/**
* Percent-encodes all unreserved characters from the input string and
* writes the encoded version to the output string.
* Be sure to allocate <b>3 times</b> the space of the input buffer for
* the output buffer for <c>normalizeBreaks == URI_FALSE</c> and <b>6 times</b>
* the space for <c>normalizeBreaks == URI_TRUE</c>
* (since e.g. "\x0d" becomes "%0D%0A" in that case)
*
* @param inFirst <b>IN</b>: Pointer to first character of the input text
* @param inAfterLast <b>IN</b>: Pointer after the last character of the input text
* @param out <b>OUT</b>: Encoded text destination
* @param spaceToPlus <b>IN</b>: Wether to convert ' ' to '+' or not
* @param normalizeBreaks <b>IN</b>: Wether to convert CR and LF to CR-LF or not.
* @return Position of terminator in output string
*
* @see uriEscapeA
* @see uriUnescapeInPlaceExA
* @since 0.5.2
*/
URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst,
const URI_CHAR * inAfterLast, URI_CHAR * out,
UriBool spaceToPlus, UriBool normalizeBreaks);
/**
* Percent-encodes all unreserved characters from the input string and
* writes the encoded version to the output string.
* Be sure to allocate <b>3 times</b> the space of the input buffer for
* the output buffer for <c>normalizeBreaks == URI_FALSE</c> and <b>6 times</b>
* the space for <c>normalizeBreaks == URI_FALSE</c>
* (since e.g. "\x0d" becomes "%0D%0A" in that case)
*
* @param in <b>IN</b>: Text source
* @param out <b>OUT</b>: Encoded text destination
* @param spaceToPlus <b>IN</b>: Wether to convert ' ' to '+' or not
* @param normalizeBreaks <b>IN</b>: Wether to convert CR and LF to CR-LF or not.
* @return Position of terminator in output string
*
* @see uriEscapeExA
* @see uriUnescapeInPlaceA
* @since 0.5.0
*/
URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out,
UriBool spaceToPlus, UriBool normalizeBreaks);
/**
* Unescapes percent-encoded groups in a given string.
* E.g. "%20" will become " ". Unescaping is done in place.
* The return value will be point to the new position
* of the terminating zero. Use this value to get the new
* length of the string. NULL is only returned if <c>inout</c>
* is NULL.
*
* @param inout <b>INOUT</b>: Text to unescape/decode
* @param plusToSpace <b>IN</b>: Whether to convert '+' to ' ' or not
* @param breakConversion <b>IN</b>: Line break conversion mode
* @return Pointer to new position of the terminating zero
*
* @see uriUnescapeInPlaceA
* @see uriEscapeExA
* @since 0.5.0
*/
const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout,
UriBool plusToSpace, UriBreakConversion breakConversion);
/**
* Unescapes percent-encoded groups in a given string.
* E.g. "%20" will become " ". Unescaping is done in place.
* The return value will be point to the new position
* of the terminating zero. Use this value to get the new
* length of the string. NULL is only returned if <c>inout</c>
* is NULL.
*
* NOTE: '+' is not decoded to ' ' and line breaks are not converted.
* Use the more advanced UnescapeInPlaceEx for that features instead.
*
* @param inout <b>INOUT</b>: Text to unescape/decode
* @return Pointer to new position of the terminating zero
*
* @see uriUnescapeInPlaceExA
* @see uriEscapeA
* @since 0.3.0
*/
const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout);
/**
* Performs reference resolution as described in
* <a href="http://tools.ietf.org/html/rfc3986#section-5.2.2">section 5.2.2 of RFC 3986</a>.
* NOTE: On success you have to call uriFreeUriMembersA on \p absoluteDest manually later.
*
* @param absoluteDest <b>OUT</b>: Result %URI
* @param relativeSource <b>IN</b>: Reference to resolve
* @param absoluteBase <b>IN</b>: Base %URI to apply
* @return Error code or 0 on success
*
* @see uriRemoveBaseUriA
* @since 0.4.0
*/
int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absoluteDest,
const URI_TYPE(Uri) * relativeSource,
const URI_TYPE(Uri) * absoluteBase);
/**
* Tries to make a relative %URI (a reference) from an
* absolute %URI and a given base %URI. This can only work if
* the absolute %URI shares scheme and authority with
* the base %URI. If it does not the result will still be
* an absolute URI (with scheme part if necessary).
* NOTE: On success you have to call uriFreeUriMembersA on
* \p dest manually later.
*
* @param dest <b>OUT</b>: Result %URI
* @param absoluteSource <b>IN</b>: Absolute %URI to make relative
* @param absoluteBase <b>IN</b>: Base %URI
* @param domainRootMode <b>IN</b>: Create %URI with path relative to domain root
* @return Error code or 0 on success
*
* @see uriAddBaseUriA
* @since 0.5.2
*/
int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * absoluteSource,
const URI_TYPE(Uri) * absoluteBase,
UriBool domainRootMode);
/**
* Checks two URIs for equivalence. Comparison is done
* the naive way, without prior normalization.
* NOTE: Two <c>NULL</c> URIs are equal as well.
*
* @param a <b>IN</b>: First %URI
* @param b <b>IN</b>: Second %URI
* @return <c>URI_TRUE</c> when equal, <c>URI_FAlSE</c> else
*
* @since 0.4.0
*/
UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a, const URI_TYPE(Uri) * b);
/**
* Calculates the number of characters needed to store the
* string representation of the given %URI excluding the
* terminator.
*
* @param uri <b>IN</b>: %URI to measure
* @param charsRequired <b>OUT</b>: Length of the string representation in characters <b>excluding</b> terminator
* @return Error code or 0 on success
*
* @see uriToStringA
* @since 0.5.0
*/
int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri,
int * charsRequired);
/**
* Converts a %URI structure back to text as described in
* <a href="http://tools.ietf.org/html/rfc3986#section-5.3">section 5.3 of RFC 3986</a>.
*
* @param dest <b>OUT</b>: Output destination
* @param uri <b>IN</b>: %URI to convert
* @param maxChars <b>IN</b>: Maximum number of characters to copy <b>including</b> terminator
* @param charsWritten <b>OUT</b>: Number of characters written, can be lower than maxChars even if the %URI is too long!
* @return Error code or 0 on success
*
* @see uriToStringCharsRequiredA
* @since 0.4.0
*/
int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten);
/**
* Determines the components of a %URI that are not normalized.
*
* @param uri <b>IN</b>: %URI to check
* @return Normalization job mask
*
* @see uriNormalizeSyntaxA
* @since 0.5.0
*/
unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri);
/**
* Normalizes a %URI using a normalization mask.
* The normalization mask decides what components are normalized.
*
* NOTE: If necessary the %URI becomes owner of all memory
* behind the text pointed to. Text is duplicated in that case.
*
* @param uri <b>INOUT</b>: %URI to normalize
* @param mask <b>IN</b>: Normalization mask
* @return Error code or 0 on success
*
* @see uriNormalizeSyntaxA
* @see uriNormalizeSyntaxMaskRequiredA
* @since 0.5.0
*/
int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask);
/**
* Normalizes all components of a %URI.
*
* NOTE: If necessary the %URI becomes owner of all memory
* behind the text pointed to. Text is duplicated in that case.
*
* @param uri <b>INOUT</b>: %URI to normalize
* @return Error code or 0 on success
*
* @see uriNormalizeSyntaxExA
* @see uriNormalizeSyntaxMaskRequiredA
* @since 0.5.0
*/
int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri);
/**
* Converts a Unix filename to a %URI string.
* The destination buffer must be large enough to hold 7 + 3 * len(filename) + 1
* characters in case of an absolute filename or 3 * len(filename) + 1 in case
* of a relative filename.
*
* EXAMPLE
* Input: "/bin/bash"
* Output: "file:///bin/bash"
*
* @param filename <b>IN</b>: Unix filename to convert
* @param uriString <b>OUT</b>: Destination to write %URI string to
* @return Error code or 0 on success
*
* @see uriUriStringToUnixFilenameA
* @see uriWindowsFilenameToUriStringA
* @since 0.5.2
*/
int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename,
URI_CHAR * uriString);
/**
* Converts a Windows filename to a %URI string.
* The destination buffer must be large enough to hold 8 + 3 * len(filename) + 1
* characters in case of an absolute filename or 3 * len(filename) + 1 in case
* of a relative filename.
*
* EXAMPLE
* Input: "E:\\Documents and Settings"
* Output: "file:///E:/Documents%20and%20Settings"
*
* @param filename <b>IN</b>: Windows filename to convert
* @param uriString <b>OUT</b>: Destination to write %URI string to
* @return Error code or 0 on success
*
* @see uriUriStringToWindowsFilenameA
* @see uriUnixFilenameToUriStringA
* @since 0.5.2
*/
int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename,
URI_CHAR * uriString);
/**
* Extracts a Unix filename from a %URI string.
* The destination buffer must be large enough to hold len(uriString) + 1 - 7
* characters in case of an absolute %URI or len(uriString) + 1 in case
* of a relative %URI.
*
* @param uriString <b>IN</b>: %URI string to convert
* @param filename <b>OUT</b>: Destination to write filename to
* @return Error code or 0 on success
*
* @see uriUnixFilenameToUriStringA
* @see uriUriStringToWindowsFilenameA
* @since 0.5.2
*/
int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString,
URI_CHAR * filename);
/**
* Extracts a Windows filename from a %URI string.
* The destination buffer must be large enough to hold len(uriString) + 1 - 8
* characters in case of an absolute %URI or len(uriString) + 1 in case
* of a relative %URI.
*
* @param uriString <b>IN</b>: %URI string to convert
* @param filename <b>OUT</b>: Destination to write filename to
* @return Error code or 0 on success
*
* @see uriWindowsFilenameToUriStringA
* @see uriUriStringToUnixFilenameA
* @since 0.5.2
*/
int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString,
URI_CHAR * filename);
/**
* Calculates the number of characters needed to store the
* string representation of the given query list excluding the
* terminator. It is assumed that line breaks are will be
* normalized to "%0D%0A".
*
* @param queryList <b>IN</b>: Query list to measure
* @param charsRequired <b>OUT</b>: Length of the string representation in characters <b>excluding</b> terminator
* @return Error code or 0 on success
*
* @see uriComposeQueryCharsRequiredExA
* @see uriComposeQueryA
* @since 0.7.0
*/
int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList,
int * charsRequired);
/**
* Calculates the number of characters needed to store the
* string representation of the given query list excluding the
* terminator.
*
* @param queryList <b>IN</b>: Query list to measure
* @param charsRequired <b>OUT</b>: Length of the string representation in characters <b>excluding</b> terminator
* @param spaceToPlus <b>IN</b>: Wether to convert ' ' to '+' or not
* @param normalizeBreaks <b>IN</b>: Wether to convert CR and LF to CR-LF or not.
* @return Error code or 0 on success
*
* @see uriComposeQueryCharsRequiredA
* @see uriComposeQueryExA
* @since 0.7.0
*/
int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList,
int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks);
/**
* Converts a query list structure back to a query string.
* The composed string does not start with '?',
* on the way ' ' is converted to '+' and line breaks are
* normalized to "%0D%0A".
*
* @param dest <b>OUT</b>: Output destination
* @param queryList <b>IN</b>: Query list to convert
* @param maxChars <b>IN</b>: Maximum number of characters to copy <b>including</b> terminator
* @param charsWritten <b>OUT</b>: Number of characters written, can be lower than maxChars even if the query list is too long!
* @return Error code or 0 on success
*
* @see uriComposeQueryExA
* @see uriComposeQueryMallocA
* @see uriComposeQueryCharsRequiredA
* @see uriDissectQueryMallocA
* @since 0.7.0
*/
int URI_FUNC(ComposeQuery)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten);
/**
* Converts a query list structure back to a query string.
* The composed string does not start with '?'.
*
* @param dest <b>OUT</b>: Output destination
* @param queryList <b>IN</b>: Query list to convert
* @param maxChars <b>IN</b>: Maximum number of characters to copy <b>including</b> terminator
* @param charsWritten <b>OUT</b>: Number of characters written, can be lower than maxChars even if the query list is too long!
* @param spaceToPlus <b>IN</b>: Wether to convert ' ' to '+' or not
* @param normalizeBreaks <b>IN</b>: Wether to convert CR and LF to CR-LF or not.
* @return Error code or 0 on success
*
* @see uriComposeQueryA
* @see uriComposeQueryMallocExA
* @see uriComposeQueryCharsRequiredExA
* @see uriDissectQueryMallocExA
* @since 0.7.0
*/
int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten,
UriBool spaceToPlus, UriBool normalizeBreaks);
/**
* Converts a query list structure back to a query string.
* Memory for this string is allocated internally.
* The composed string does not start with '?',
* on the way ' ' is converted to '+' and line breaks are
* normalized to "%0D%0A".
*
* @param dest <b>OUT</b>: Output destination
* @param queryList <b>IN</b>: Query list to convert
* @return Error code or 0 on success
*
* @see uriComposeQueryMallocExA
* @see uriComposeQueryA
* @see uriDissectQueryMallocA
* @since 0.7.0
*/
int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest,
const URI_TYPE(QueryList) * queryList);
/**
* Converts a query list structure back to a query string.
* Memory for this string is allocated internally.
* The composed string does not start with '?'.
*
* @param dest <b>OUT</b>: Output destination
* @param queryList <b>IN</b>: Query list to convert
* @param spaceToPlus <b>IN</b>: Wether to convert ' ' to '+' or not
* @param normalizeBreaks <b>IN</b>: Wether to convert CR and LF to CR-LF or not.
* @return Error code or 0 on success
*
* @see uriComposeQueryMallocA
* @see uriComposeQueryExA
* @see uriDissectQueryMallocExA
* @since 0.7.0
*/
int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest,
const URI_TYPE(QueryList) * queryList,
UriBool spaceToPlus, UriBool normalizeBreaks);
/**
* Constructs a query list from the raw query string of a given URI.
* On the way '+' is converted back to ' ', line breaks are not modified.
*
* @param dest <b>OUT</b>: Output destination
* @param itemCount <b>OUT</b>: Number of items found, can be NULL
* @param first <b>IN</b>: Pointer to first character <b>after</b> '?'
* @param afterLast <b>IN</b>: Pointer to character after the last one still in
* @return Error code or 0 on success
*
* @see uriDissectQueryMallocExA
* @see uriComposeQueryA
* @see uriFreeQueryListA
* @since 0.7.0
*/
int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount,
const URI_CHAR * first, const URI_CHAR * afterLast);
/**
* Constructs a query list from the raw query string of a given URI.
*
* @param dest <b>OUT</b>: Output destination
* @param itemCount <b>OUT</b>: Number of items found, can be NULL
* @param first <b>IN</b>: Pointer to first character <b>after</b> '?'
* @param afterLast <b>IN</b>: Pointer to character after the last one still in
* @param plusToSpace <b>IN</b>: Whether to convert '+' to ' ' or not
* @param breakConversion <b>IN</b>: Line break conversion mode
* @return Error code or 0 on success
*
* @see uriDissectQueryMallocA
* @see uriComposeQueryExA
* @see uriFreeQueryListA
* @since 0.7.0
*/
int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount,
const URI_CHAR * first, const URI_CHAR * afterLast,
UriBool plusToSpace, UriBreakConversion breakConversion);
/**
* Frees all memory associated with the given query list.
* The structure itself is freed as well.
*
* @param queryList <b>INOUT</b>: Query list to free
*
* @since 0.7.0
*/
void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList);
#ifdef __cplusplus
}
#endif
#endif
#endif

View File

@ -0,0 +1,188 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriBase.h
* Holds definitions independent of the encoding pass.
*/
#ifndef URI_BASE_H
#define URI_BASE_H 1
/* Version helper macro */
#define URI_ANSI_TO_UNICODE(x) L##x
/* Version */
#define URI_VER_MAJOR 0
#define URI_VER_MINOR 7
#define URI_VER_RELEASE 5
#define URI_VER_SUFFIX_ANSI ""
#define URI_VER_SUFFIX_UNICODE URI_ANSI_TO_UNICODE(URI_VER_SUFFIX_ANSI)
/* More version helper macros */
#define URI_INT_TO_ANSI_HELPER(x) #x
#define URI_INT_TO_ANSI(x) URI_INT_TO_ANSI_HELPER(x)
#define URI_INT_TO_UNICODE_HELPER(x) URI_ANSI_TO_UNICODE(#x)
#define URI_INT_TO_UNICODE(x) URI_INT_TO_UNICODE_HELPER(x)
#define URI_VER_ANSI_HELPER(ma, mi, r, s) \
URI_INT_TO_ANSI(ma) "." \
URI_INT_TO_ANSI(mi) "." \
URI_INT_TO_ANSI(r) \
s
#define URI_VER_UNICODE_HELPER(ma, mi, r, s) \
URI_INT_TO_UNICODE(ma) L"." \
URI_INT_TO_UNICODE(mi) L"." \
URI_INT_TO_UNICODE(r) \
s
/* Full version strings */
#define URI_VER_ANSI URI_VER_ANSI_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_ANSI)
#define URI_VER_UNICODE URI_VER_UNICODE_HELPER(URI_VER_MAJOR, URI_VER_MINOR, URI_VER_RELEASE, URI_VER_SUFFIX_UNICODE)
/* Unused parameter macro */
#ifdef __GNUC__
# define URI_UNUSED(x) unused_##x __attribute__((unused))
#else
# define URI_UNUSED(x) x
#endif
typedef int UriBool; /**< Boolean type */
#define URI_TRUE 1
#define URI_FALSE 0
/* Shared errors */
#define URI_SUCCESS 0
#define URI_ERROR_SYNTAX 1 /* Parsed text violates expected format */
#define URI_ERROR_NULL 2 /* One of the params passed was NULL
although it mustn't be */
#define URI_ERROR_MALLOC 3 /* Requested memory could not be allocated */
#define URI_ERROR_OUTPUT_TOO_LARGE 4 /* Some output is to large for the receiving buffer */
#define URI_ERROR_NOT_IMPLEMENTED 8 /* The called function is not implemented yet */
#define URI_ERROR_RANGE_INVALID 9 /* The parameters passed contained invalid ranges */
/* Errors specific to ToString */
#define URI_ERROR_TOSTRING_TOO_LONG URI_ERROR_OUTPUT_TOO_LARGE /* Deprecated, test for URI_ERROR_OUTPUT_TOO_LARGE instead */
/* Errors specific to AddBaseUri */
#define URI_ERROR_ADDBASE_REL_BASE 5 /* Given base is not absolute */
/* Errors specific to RemoveBaseUri */
#define URI_ERROR_REMOVEBASE_REL_BASE 6 /* Given base is not absolute */
#define URI_ERROR_REMOVEBASE_REL_SOURCE 7 /* Given base is not absolute */
#ifndef URI_DOXYGEN
# include <stdio.h> /* For NULL, snprintf */
# include <ctype.h> /* For wchar_t */
# include <string.h> /* For strlen, memset, memcpy */
# include <stdlib.h> /* For malloc */
#endif /* URI_DOXYGEN */
/**
* Holds an IPv4 address.
*/
typedef struct UriIp4Struct {
unsigned char data[4]; /**< Each octet in one byte */
} UriIp4; /**< @copydoc UriIp4Struct */
/**
* Holds an IPv6 address.
*/
typedef struct UriIp6Struct {
unsigned char data[16]; /**< Each quad in two bytes */
} UriIp6; /**< @copydoc UriIp6Struct */
/**
* Specifies a line break conversion mode
*/
typedef enum UriBreakConversionEnum {
URI_BR_TO_LF, /**< Convert to Unix line breaks ("\\x0a") */
URI_BR_TO_CRLF, /**< Convert to Windows line breaks ("\\x0d\\x0a") */
URI_BR_TO_CR, /**< Convert to Macintosh line breaks ("\\x0d") */
URI_BR_TO_UNIX = URI_BR_TO_LF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_LF */
URI_BR_TO_WINDOWS = URI_BR_TO_CRLF, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CRLF */
URI_BR_TO_MAC = URI_BR_TO_CR, /**< @copydoc UriBreakConversionEnum::URI_BR_TO_CR */
URI_BR_DONT_TOUCH /**< Copy line breaks unmodified */
} UriBreakConversion; /**< @copydoc UriBreakConversionEnum */
/**
* Specifies which component of a %URI has to be normalized.
*/
typedef enum UriNormalizationMaskEnum {
URI_NORMALIZED = 0, /**< Do not normalize anything */
URI_NORMALIZE_SCHEME = 1 << 0, /**< Normalize scheme (fix uppercase letters) */
URI_NORMALIZE_USER_INFO = 1 << 1, /**< Normalize user info (fix uppercase percent-encodings) */
URI_NORMALIZE_HOST = 1 << 2, /**< Normalize host (fix uppercase letters) */
URI_NORMALIZE_PATH = 1 << 3, /**< Normalize path (fix uppercase percent-encodings and redundant dot segments) */
URI_NORMALIZE_QUERY = 1 << 4, /**< Normalize query (fix uppercase percent-encodings) */
URI_NORMALIZE_FRAGMENT = 1 << 5 /**< Normalize fragment (fix uppercase percent-encodings) */
} UriNormalizationMask; /**< @copydoc UriNormalizationMaskEnum */
#endif /* URI_BASE_H */

View File

@ -0,0 +1,527 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriCommon.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriCommon.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
/*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X");
/*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT(".");
/*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT("..");
void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) {
memset(uri, 0, sizeof(URI_TYPE(Uri)));
}
/* Properly removes "." and ".." path segments */
UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri,
UriBool relative) {
if (uri == NULL) {
return URI_TRUE;
}
return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner);
}
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned) {
URI_TYPE(PathSegment) * walker;
if ((uri == NULL) || (uri->pathHead == NULL)) {
return URI_TRUE;
}
walker = uri->pathHead;
walker->reserved = NULL; /* Prev pointer */
do {
UriBool removeSegment = URI_FALSE;
int len = (int)(walker->text.afterLast - walker->text.first);
switch (len) {
case 1:
if ((walker->text.first)[0] == _UT('.')) {
/* "." segment -> remove if not essential */
URI_TYPE(PathSegment) * const prev = walker->reserved;
URI_TYPE(PathSegment) * const nextBackup = walker->next;
/* Is this dot segment essential? */
removeSegment = URI_TRUE;
if (relative && (walker == uri->pathHead) && (walker->next != NULL)) {
const URI_CHAR * ch = walker->next->text.first;
for (; ch < walker->next->text.afterLast; ch++) {
if (*ch == _UT(':')) {
removeSegment = URI_FALSE;
break;
}
}
}
if (removeSegment) {
/* Last segment? */
if (walker->next != NULL) {
/* Not last segment */
walker->next->reserved = prev;
if (prev == NULL) {
/* First but not last segment */
uri->pathHead = walker->next;
} else {
/* Middle segment */
prev->next = walker->next;
}
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
} else {
/* Last segment */
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
if (prev == NULL) {
/* Last and first */
if (URI_FUNC(IsHostSet)(uri)) {
/* Replace "." with empty segment to represent trailing slash */
walker->text.first = URI_FUNC(SafeToPointTo);
walker->text.afterLast = URI_FUNC(SafeToPointTo);
} else {
free(walker);
uri->pathHead = NULL;
uri->pathTail = NULL;
}
} else {
/* Last but not first, replace "." with empty segment to represent trailing slash */
walker->text.first = URI_FUNC(SafeToPointTo);
walker->text.afterLast = URI_FUNC(SafeToPointTo);
}
}
walker = nextBackup;
}
}
break;
case 2:
if (((walker->text.first)[0] == _UT('.'))
&& ((walker->text.first)[1] == _UT('.'))) {
/* Path ".." -> remove this and the previous segment */
URI_TYPE(PathSegment) * const prev = walker->reserved;
URI_TYPE(PathSegment) * prevPrev;
URI_TYPE(PathSegment) * const nextBackup = walker->next;
removeSegment = URI_TRUE;
if (relative) {
if (prev == NULL) {
removeSegment = URI_FALSE;
} else if ((prev != NULL)
&& ((prev->text.afterLast - prev->text.first) == 2)
&& ((prev->text.first)[0] == _UT('.'))
&& ((prev->text.first)[1] == _UT('.'))) {
removeSegment = URI_FALSE;
}
}
if (removeSegment) {
if (prev != NULL) {
/* Not first segment */
prevPrev = prev->reserved;
if (prevPrev != NULL) {
/* Not even prev is the first one */
prevPrev->next = walker->next;
if (walker->next != NULL) {
walker->next->reserved = prevPrev;
} else {
/* Last segment -> insert "" segment to represent trailing slash, update tail */
URI_TYPE(PathSegment) * const segment = malloc(1 * sizeof(URI_TYPE(PathSegment)));
if (segment == NULL) {
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
if (pathOwned && (prev->text.first != prev->text.afterLast)) {
free((URI_CHAR *)prev->text.first);
}
free(prev);
return URI_FALSE; /* Raises malloc error */
}
memset(segment, 0, sizeof(URI_TYPE(PathSegment)));
segment->text.first = URI_FUNC(SafeToPointTo);
segment->text.afterLast = URI_FUNC(SafeToPointTo);
prevPrev->next = segment;
uri->pathTail = segment;
}
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
if (pathOwned && (prev->text.first != prev->text.afterLast)) {
free((URI_CHAR *)prev->text.first);
}
free(prev);
walker = nextBackup;
} else {
/* Prev is the first segment */
if (walker->next != NULL) {
uri->pathHead = walker->next;
walker->next->reserved = NULL;
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
} else {
/* Re-use segment for "" path segment to represent trailing slash, update tail */
URI_TYPE(PathSegment) * const segment = walker;
if (pathOwned && (segment->text.first != segment->text.afterLast)) {
free((URI_CHAR *)segment->text.first);
}
segment->text.first = URI_FUNC(SafeToPointTo);
segment->text.afterLast = URI_FUNC(SafeToPointTo);
uri->pathHead = segment;
uri->pathTail = segment;
}
if (pathOwned && (prev->text.first != prev->text.afterLast)) {
free((URI_CHAR *)prev->text.first);
}
free(prev);
walker = nextBackup;
}
} else {
URI_TYPE(PathSegment) * const nextBackup = walker->next;
/* First segment -> update head pointer */
uri->pathHead = walker->next;
if (walker->next != NULL) {
walker->next->reserved = NULL;
} else {
/* Last segment -> update tail */
uri->pathTail = NULL;
}
if (pathOwned && (walker->text.first != walker->text.afterLast)) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
walker = nextBackup;
}
}
}
break;
}
if (!removeSegment) {
if (walker->next != NULL) {
walker->next->reserved = walker;
} else {
/* Last segment -> update tail */
uri->pathTail = walker;
}
walker = walker->next;
}
} while (walker != NULL);
return URI_TRUE;
}
/* Properly removes "." and ".." path segments */
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri) {
const UriBool ABSOLUTE = URI_FALSE;
return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE);
}
unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
switch (hexdig) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
return (unsigned char)(9 + hexdig - _UT('9'));
case _UT('a'):
case _UT('b'):
case _UT('c'):
case _UT('d'):
case _UT('e'):
case _UT('f'):
return (unsigned char)(15 + hexdig - _UT('f'));
case _UT('A'):
case _UT('B'):
case _UT('C'):
case _UT('D'):
case _UT('E'):
case _UT('F'):
return (unsigned char)(15 + hexdig - _UT('F'));
default:
return 0;
}
}
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) {
/* Uppercase recommended in section 2.1. of RFC 3986 *
* http://tools.ietf.org/html/rfc3986#section-2.1 */
return URI_FUNC(HexToLetterEx)(value, URI_TRUE);
}
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
switch (value) {
case 0: return _UT('0');
case 1: return _UT('1');
case 2: return _UT('2');
case 3: return _UT('3');
case 4: return _UT('4');
case 5: return _UT('5');
case 6: return _UT('6');
case 7: return _UT('7');
case 8: return _UT('8');
case 9: return _UT('9');
case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a');
case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b');
case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c');
case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d');
case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e');
default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f');
}
}
/* Checks if a URI has the host component set. */
UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) {
return (uri != NULL)
&& ((uri->hostText.first != NULL)
|| (uri->hostData.ip4 != NULL)
|| (uri->hostData.ip6 != NULL)
|| (uri->hostData.ipFuture.first != NULL)
);
}
/* Copies the path segment list from one URI to another. */
UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * source) {
if (source->pathHead == NULL) {
/* No path component */
dest->pathHead = NULL;
dest->pathTail = NULL;
} else {
/* Copy list but not the text contained */
URI_TYPE(PathSegment) * sourceWalker = source->pathHead;
URI_TYPE(PathSegment) * destPrev = NULL;
do {
URI_TYPE(PathSegment) * cur = malloc(sizeof(URI_TYPE(PathSegment)));
if (cur == NULL) {
/* Fix broken list */
if (destPrev != NULL) {
destPrev->next = NULL;
}
return URI_FALSE; /* Raises malloc error */
}
/* From this functions usage we know that *
* the dest URI cannot be uri->owner */
cur->text = sourceWalker->text;
if (destPrev == NULL) {
/* First segment ever */
dest->pathHead = cur;
} else {
destPrev->next = cur;
}
destPrev = cur;
sourceWalker = sourceWalker->next;
} while (sourceWalker != NULL);
dest->pathTail = destPrev;
dest->pathTail->next = NULL;
}
dest->absolutePath = source->absolutePath;
return URI_TRUE;
}
/* Copies the authority part of an URI over to another. */
UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * source) {
/* From this functions usage we know that *
* the dest URI cannot be uri->owner */
/* Copy userInfo */
dest->userInfo = source->userInfo;
/* Copy hostText */
dest->hostText = source->hostText;
/* Copy hostData */
if (source->hostData.ip4 != NULL) {
dest->hostData.ip4 = malloc(sizeof(UriIp4));
if (dest->hostData.ip4 == NULL) {
return URI_FALSE; /* Raises malloc error */
}
*(dest->hostData.ip4) = *(source->hostData.ip4);
dest->hostData.ip6 = NULL;
dest->hostData.ipFuture.first = NULL;
dest->hostData.ipFuture.afterLast = NULL;
} else if (source->hostData.ip6 != NULL) {
dest->hostData.ip4 = NULL;
dest->hostData.ip6 = malloc(sizeof(UriIp6));
if (dest->hostData.ip6 == NULL) {
return URI_FALSE; /* Raises malloc error */
}
*(dest->hostData.ip6) = *(source->hostData.ip6);
dest->hostData.ipFuture.first = NULL;
dest->hostData.ipFuture.afterLast = NULL;
} else {
dest->hostData.ip4 = NULL;
dest->hostData.ip6 = NULL;
dest->hostData.ipFuture = source->hostData.ipFuture;
}
/* Copy portText */
dest->portText = source->portText;
return URI_TRUE;
}
UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri) {
URI_TYPE(PathSegment) * segment;
if ( /* Case 1: absolute path, empty first segment */
(uri->absolutePath
&& (uri->pathHead != NULL)
&& (uri->pathHead->text.afterLast == uri->pathHead->text.first))
/* Case 2: relative path, empty first and second segment */
|| (!uri->absolutePath
&& (uri->pathHead != NULL)
&& (uri->pathHead->next != NULL)
&& (uri->pathHead->text.afterLast == uri->pathHead->text.first)
&& (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) {
/* NOOP */
} else {
return URI_TRUE;
}
segment = malloc(1 * sizeof(URI_TYPE(PathSegment)));
if (segment == NULL) {
return URI_FALSE; /* Raises malloc error */
}
/* Insert "." segment in front */
segment->next = uri->pathHead;
segment->text.first = URI_FUNC(ConstPwd);
segment->text.afterLast = URI_FUNC(ConstPwd) + 1;
uri->pathHead = segment;
return URI_TRUE;
}
void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri) {
/* Fix path if only one empty segment */
if (!uri->absolutePath
&& !URI_FUNC(IsHostSet)(uri)
&& (uri->pathHead != NULL)
&& (uri->pathHead->next == NULL)
&& (uri->pathHead->text.first == uri->pathHead->text.afterLast)) {
free(uri->pathHead);
uri->pathHead = NULL;
uri->pathTail = NULL;
}
}
#endif

View File

@ -0,0 +1,96 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_COMMON_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriCommon.h"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriCommon.h"
# undef URI_PASS_UNICODE
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_COMMON_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_COMMON_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_COMMON_H_ANSI 1
# include "UriDefsAnsi.h"
# else
# define URI_COMMON_H_UNICODE 1
# include "UriDefsUnicode.h"
# endif
/* Used to point to from empty path segments.
* X.first and X.afterLast must be the same non-NULL value then. */
extern const URI_CHAR * const URI_FUNC(SafeToPointTo);
extern const URI_CHAR * const URI_FUNC(ConstPwd);
extern const URI_CHAR * const URI_FUNC(ConstParent);
void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri);
UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri);
UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri, UriBool relative);
UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
UriBool relative, UriBool pathOwned);
unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig);
URI_CHAR URI_FUNC(HexToLetter)(unsigned int value);
URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase);
UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri);
UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source);
UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest, const URI_TYPE(Uri) * source);
UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri);
void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri);
#endif
#endif

View File

@ -0,0 +1,191 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriCompare.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriCompare.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriIp4.h"
# include "UriCommon.h"
#endif
static int URI_FUNC(CompareRange)(const URI_TYPE(TextRange) * a,
const URI_TYPE(TextRange) * b);
/* Compares two text ranges for equal text content */
static URI_INLINE int URI_FUNC(CompareRange)(const URI_TYPE(TextRange) * a,
const URI_TYPE(TextRange) * b) {
int diff;
/* NOTE: Both NULL means equal! */
if ((a == NULL) || (b == NULL)) {
return ((a == NULL) && (b == NULL)) ? URI_TRUE : URI_FALSE;
}
diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first));
if (diff > 0) {
return 1;
} else if (diff < 0) {
return -1;
}
return URI_STRNCMP(a->first, b->first, (a->afterLast - a->first));
}
UriBool URI_FUNC(EqualsUri)(const URI_TYPE(Uri) * a,
const URI_TYPE(Uri) * b) {
/* NOTE: Both NULL means equal! */
if ((a == NULL) || (b == NULL)) {
return ((a == NULL) && (b == NULL)) ? URI_TRUE : URI_FALSE;
}
/* scheme */
if (URI_FUNC(CompareRange)(&(a->scheme), &(b->scheme))) {
return URI_FALSE;
}
/* absolutePath */
if ((a->scheme.first == NULL)&& (a->absolutePath != b->absolutePath)) {
return URI_FALSE;
}
/* userInfo */
if (URI_FUNC(CompareRange)(&(a->userInfo), &(b->userInfo))) {
return URI_FALSE;
}
/* Host */
if (((a->hostData.ip4 == NULL) != (b->hostData.ip4 == NULL))
|| ((a->hostData.ip6 == NULL) != (b->hostData.ip6 == NULL))
|| ((a->hostData.ipFuture.first == NULL)
!= (b->hostData.ipFuture.first == NULL))) {
return URI_FALSE;
}
if (a->hostData.ip4 != NULL) {
if (memcmp(a->hostData.ip4->data, b->hostData.ip4->data, 4)) {
return URI_FALSE;
}
}
if (a->hostData.ip6 != NULL) {
if (memcmp(a->hostData.ip6->data, b->hostData.ip6->data, 16)) {
return URI_FALSE;
}
}
if (a->hostData.ipFuture.first != NULL) {
if (URI_FUNC(CompareRange)(&(a->hostData.ipFuture), &(b->hostData.ipFuture))) {
return URI_FALSE;
}
}
if ((a->hostData.ip4 == NULL)
&& (a->hostData.ip6 == NULL)
&& (a->hostData.ipFuture.first == NULL)) {
if (URI_FUNC(CompareRange)(&(a->hostText), &(b->hostText))) {
return URI_FALSE;
}
}
/* portText */
if (URI_FUNC(CompareRange)(&(a->portText), &(b->portText))) {
return URI_FALSE;
}
/* Path */
if ((a->pathHead == NULL) != (b->pathHead == NULL)) {
return URI_FALSE;
}
if (a->pathHead != NULL) {
URI_TYPE(PathSegment) * walkA = a->pathHead;
URI_TYPE(PathSegment) * walkB = b->pathHead;
do {
if (URI_FUNC(CompareRange)(&(walkA->text), &(walkB->text))) {
return URI_FALSE;
}
if ((walkA->next == NULL) != (walkB->next == NULL)) {
return URI_FALSE;
}
walkA = walkA->next;
walkB = walkB->next;
} while (walkA != NULL);
}
/* query */
if (URI_FUNC(CompareRange)(&(a->query), &(b->query))) {
return URI_FALSE;
}
/* fragment */
if (URI_FUNC(CompareRange)(&(a->fragment), &(b->fragment))) {
return URI_FALSE;
}
return URI_TRUE; /* Equal*/
}
#endif

View File

@ -0,0 +1,82 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriDefsAnsi.h
* Holds definitions for the ANSI pass.
* NOTE: This header is included N times, not once.
*/
/* Allow multi inclusion */
#include "UriDefsConfig.h"
#undef URI_CHAR
#define URI_CHAR char
#undef _UT
#define _UT(x) x
#undef URI_FUNC
#define URI_FUNC(x) uri##x##A
#undef URI_TYPE
#define URI_TYPE(x) Uri##x##A
#undef URI_STRLEN
#define URI_STRLEN strlen
#undef URI_STRCPY
#define URI_STRCPY strcpy
#undef URI_STRCMP
#define URI_STRCMP strcmp
#undef URI_STRNCMP
#define URI_STRNCMP strncmp
/* TODO Remove on next source-compatibility break */
#undef URI_SNPRINTF
#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32))
# define URI_SNPRINTF _snprintf
#else
# define URI_SNPRINTF snprintf
#endif

View File

@ -0,0 +1,105 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriDefsConfig.h
* Adjusts the internal configuration after processing external definitions.
*/
#ifndef URI_DEFS_CONFIG_H
#define URI_DEFS_CONFIG_H 1
/* Deny external overriding */
#undef URI_ENABLE_ANSI /* Internal for !URI_NO_ANSI */
#undef URI_ENABLE_UNICODE /* Internal for !URI_NO_UNICODE */
/* Encoding */
#ifdef URI_NO_ANSI
# ifdef URI_NO_UNICODE
/* No encoding at all */
# error URI_NO_ANSI and URI_NO_UNICODE cannot go together.
# else
/* Unicode only */
# define URI_ENABLE_UNICODE 1
# endif
#else
# ifdef URI_NO_UNICODE
/* ANSI only */
# define URI_ENABLE_ANSI 1
# else
/* Both ANSI and Unicode */
# define URI_ENABLE_ANSI 1
# define URI_ENABLE_UNICODE 1
# endif
#endif
/* Function inlining, not ANSI/ISO C! */
#if (defined(URI_DOXYGEN) || defined(URI_SIZEDOWN))
# define URI_INLINE
#elif defined(__INTEL_COMPILER)
/* Intel C/C++ */
/* http://predef.sourceforge.net/precomp.html#sec20 */
/* http://www.intel.com/support/performancetools/c/windows/sb/CS-007751.htm#2 */
# define URI_INLINE __force_inline
#elif defined(_MSC_VER)
/* Microsoft Visual C++ */
/* http://predef.sourceforge.net/precomp.html#sec32 */
/* http://msdn2.microsoft.com/en-us/library/ms882281.aspx */
# define URI_INLINE __forceinline
#elif (__GNUC__ >= 4)
/* GCC C/C++ 4.x.x */
/* http://predef.sourceforge.net/precomp.html#sec13 */
# define URI_INLINE __attribute__((always_inline))
#elif (__STDC_VERSION__ >= 199901L)
/* C99, "inline" is a keyword */
# define URI_INLINE inline
#else
/* No inlining */
# define URI_INLINE
#endif
#endif /* URI_DEFS_CONFIG_H */

View File

@ -0,0 +1,82 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriDefsUnicode.h
* Holds definitions for the Unicode pass.
* NOTE: This header is included N times, not once.
*/
/* Allow multi inclusion */
#include "UriDefsConfig.h"
#undef URI_CHAR
#define URI_CHAR wchar_t
#undef _UT
#define _UT(x) L##x
#undef URI_FUNC
#define URI_FUNC(x) uri##x##W
#undef URI_TYPE
#define URI_TYPE(x) Uri##x##W
#undef URI_STRLEN
#define URI_STRLEN wcslen
#undef URI_STRCPY
#define URI_STRCPY wcscpy
#undef URI_STRCMP
#define URI_STRCMP wcscmp
#undef URI_STRNCMP
#define URI_STRNCMP wcsncmp
/* TODO Remove on next source-compatibility break */
#undef URI_SNPRINTF
#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32))
# define URI_SNPRINTF _snwprintf
#else
# define URI_SNPRINTF swprintf
#endif

View File

@ -0,0 +1,449 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriEscape.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriEscape.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out,
UriBool spaceToPlus, UriBool normalizeBreaks) {
return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks);
}
URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst,
const URI_CHAR * inAfterLast, URI_CHAR * out,
UriBool spaceToPlus, UriBool normalizeBreaks) {
const URI_CHAR * read = inFirst;
URI_CHAR * write = out;
UriBool prevWasCr = URI_FALSE;
if ((out == NULL) || (inFirst == out)) {
return NULL;
} else if (inFirst == NULL) {
if (out != NULL) {
out[0] = _UT('\0');
}
return out;
}
for (;;) {
if ((inAfterLast != NULL) && (read >= inAfterLast)) {
write[0] = _UT('\0');
return write;
}
switch (read[0]) {
case _UT('\0'):
write[0] = _UT('\0');
return write;
case _UT(' '):
if (spaceToPlus) {
write[0] = _UT('+');
write++;
} else {
write[0] = _UT('%');
write[1] = _UT('2');
write[2] = _UT('0');
write += 3;
}
prevWasCr = URI_FALSE;
break;
case _UT('a'): /* ALPHA */
case _UT('A'):
case _UT('b'):
case _UT('B'):
case _UT('c'):
case _UT('C'):
case _UT('d'):
case _UT('D'):
case _UT('e'):
case _UT('E'):
case _UT('f'):
case _UT('F'):
case _UT('g'):
case _UT('G'):
case _UT('h'):
case _UT('H'):
case _UT('i'):
case _UT('I'):
case _UT('j'):
case _UT('J'):
case _UT('k'):
case _UT('K'):
case _UT('l'):
case _UT('L'):
case _UT('m'):
case _UT('M'):
case _UT('n'):
case _UT('N'):
case _UT('o'):
case _UT('O'):
case _UT('p'):
case _UT('P'):
case _UT('q'):
case _UT('Q'):
case _UT('r'):
case _UT('R'):
case _UT('s'):
case _UT('S'):
case _UT('t'):
case _UT('T'):
case _UT('u'):
case _UT('U'):
case _UT('v'):
case _UT('V'):
case _UT('w'):
case _UT('W'):
case _UT('x'):
case _UT('X'):
case _UT('y'):
case _UT('Y'):
case _UT('z'):
case _UT('Z'):
case _UT('0'): /* DIGIT */
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
case _UT('-'): /* "-" / "." / "_" / "~" */
case _UT('.'):
case _UT('_'):
case _UT('~'):
/* Copy unmodified */
write[0] = read[0];
write++;
prevWasCr = URI_FALSE;
break;
case _UT('\x0a'):
if (normalizeBreaks) {
if (!prevWasCr) {
write[0] = _UT('%');
write[1] = _UT('0');
write[2] = _UT('D');
write[3] = _UT('%');
write[4] = _UT('0');
write[5] = _UT('A');
write += 6;
}
} else {
write[0] = _UT('%');
write[1] = _UT('0');
write[2] = _UT('A');
write += 3;
}
prevWasCr = URI_FALSE;
break;
case _UT('\x0d'):
if (normalizeBreaks) {
write[0] = _UT('%');
write[1] = _UT('0');
write[2] = _UT('D');
write[3] = _UT('%');
write[4] = _UT('0');
write[5] = _UT('A');
write += 6;
} else {
write[0] = _UT('%');
write[1] = _UT('0');
write[2] = _UT('D');
write += 3;
}
prevWasCr = URI_TRUE;
break;
default:
/* Percent encode */
{
const unsigned char code = (unsigned char)read[0];
write[0] = _UT('%');
write[1] = URI_FUNC(HexToLetter)(code >> 4);
write[2] = URI_FUNC(HexToLetter)(code & 0x0f);
write += 3;
}
prevWasCr = URI_FALSE;
break;
}
read++;
}
}
const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) {
return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH);
}
const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout,
UriBool plusToSpace, UriBreakConversion breakConversion) {
URI_CHAR * read = inout;
URI_CHAR * write = inout;
UriBool prevWasCr = URI_FALSE;
if (inout == NULL) {
return NULL;
}
for (;;) {
switch (read[0]) {
case _UT('\0'):
if (read > write) {
write[0] = _UT('\0');
}
return write;
case _UT('%'):
switch (read[1]) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
case _UT('a'):
case _UT('b'):
case _UT('c'):
case _UT('d'):
case _UT('e'):
case _UT('f'):
case _UT('A'):
case _UT('B'):
case _UT('C'):
case _UT('D'):
case _UT('E'):
case _UT('F'):
switch (read[2]) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
case _UT('a'):
case _UT('b'):
case _UT('c'):
case _UT('d'):
case _UT('e'):
case _UT('f'):
case _UT('A'):
case _UT('B'):
case _UT('C'):
case _UT('D'):
case _UT('E'):
case _UT('F'):
{
/* Percent group found */
const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
const int code = 16 * left + right;
switch (code) {
case 10:
switch (breakConversion) {
case URI_BR_TO_LF:
if (!prevWasCr) {
write[0] = (URI_CHAR)10;
write++;
}
break;
case URI_BR_TO_CRLF:
if (!prevWasCr) {
write[0] = (URI_CHAR)13;
write[1] = (URI_CHAR)10;
write += 2;
}
break;
case URI_BR_TO_CR:
if (!prevWasCr) {
write[0] = (URI_CHAR)13;
write++;
}
break;
case URI_BR_DONT_TOUCH:
default:
write[0] = (URI_CHAR)10;
write++;
}
prevWasCr = URI_FALSE;
break;
case 13:
switch (breakConversion) {
case URI_BR_TO_LF:
write[0] = (URI_CHAR)10;
write++;
break;
case URI_BR_TO_CRLF:
write[0] = (URI_CHAR)13;
write[1] = (URI_CHAR)10;
write += 2;
break;
case URI_BR_TO_CR:
write[0] = (URI_CHAR)13;
write++;
break;
case URI_BR_DONT_TOUCH:
default:
write[0] = (URI_CHAR)13;
write++;
}
prevWasCr = URI_TRUE;
break;
default:
write[0] = (URI_CHAR)(code);
write++;
prevWasCr = URI_FALSE;
}
read += 3;
}
break;
default:
/* Copy two chars unmodified and */
/* look at this char again */
if (read > write) {
write[0] = read[0];
write[1] = read[1];
}
read += 2;
write += 2;
prevWasCr = URI_FALSE;
}
break;
default:
/* Copy one char unmodified and */
/* look at this char again */
if (read > write) {
write[0] = read[0];
}
read++;
write++;
prevWasCr = URI_FALSE;
}
break;
case _UT('+'):
if (plusToSpace) {
/* Convert '+' to ' ' */
write[0] = _UT(' ');
} else {
/* Copy one char unmodified */
if (read > write) {
write[0] = read[0];
}
}
read++;
write++;
prevWasCr = URI_FALSE;
break;
default:
/* Copy one char unmodified */
if (read > write) {
write[0] = read[0];
}
read++;
write++;
prevWasCr = URI_FALSE;
}
}
}
#endif

View File

@ -0,0 +1,182 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriFile.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriFile.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
#endif
static URI_INLINE int URI_FUNC(FilenameToUriString)(const URI_CHAR * filename,
URI_CHAR * uriString, UriBool fromUnix) {
const URI_CHAR * input = filename;
const URI_CHAR * lastSep = input - 1;
UriBool firstSegment = URI_TRUE;
URI_CHAR * output = uriString;
const UriBool absolute = (filename != NULL) && ((fromUnix && (filename[0] == _UT('/')))
|| (!fromUnix && (filename[0] != _UT('\0')) && (filename[1] == _UT(':'))));
if ((filename == NULL) || (uriString == NULL)) {
return URI_ERROR_NULL;
}
if (absolute) {
const URI_CHAR * const prefix = fromUnix ? _UT("file://") : _UT("file:///");
const int prefixLen = fromUnix ? 7 : 8;
/* Copy prefix */
memcpy(uriString, prefix, prefixLen * sizeof(URI_CHAR));
output += prefixLen;
}
/* Copy and escape on the fly */
for (;;) {
if ((input[0] == _UT('\0'))
|| (fromUnix && input[0] == _UT('/'))
|| (!fromUnix && input[0] == _UT('\\'))) {
/* Copy text after last seperator */
if (lastSep + 1 < input) {
if (!fromUnix && absolute && (firstSegment == URI_TRUE)) {
/* Quick hack to not convert "C:" to "C%3A" */
const int charsToCopy = (int)(input - (lastSep + 1));
memcpy(output, lastSep + 1, charsToCopy * sizeof(URI_CHAR));
output += charsToCopy;
} else {
output = URI_FUNC(EscapeEx)(lastSep + 1, input, output,
URI_FALSE, URI_FALSE);
}
}
firstSegment = URI_FALSE;
}
if (input[0] == _UT('\0')) {
output[0] = _UT('\0');
break;
} else if (fromUnix && (input[0] == _UT('/'))) {
/* Copy separators unmodified */
output[0] = _UT('/');
output++;
lastSep = input;
} else if (!fromUnix && (input[0] == _UT('\\'))) {
/* Convert backslashes to forward slashes */
output[0] = _UT('/');
output++;
lastSep = input;
}
input++;
}
return URI_SUCCESS;
}
static URI_INLINE int URI_FUNC(UriStringToFilename)(const URI_CHAR * uriString,
URI_CHAR * filename, UriBool toUnix) {
const URI_CHAR * const prefix = toUnix ? _UT("file://") : _UT("file:///");
const int prefixLen = toUnix ? 7 : 8;
URI_CHAR * walker = filename;
size_t charsToCopy;
const UriBool absolute = (URI_STRNCMP(uriString, prefix, prefixLen) == 0);
const int charsToSkip = (absolute ? prefixLen : 0);
charsToCopy = URI_STRLEN(uriString + charsToSkip) + 1;
memcpy(filename, uriString + charsToSkip, charsToCopy * sizeof(URI_CHAR));
URI_FUNC(UnescapeInPlaceEx)(filename, URI_FALSE, URI_BR_DONT_TOUCH);
/* Convert forward slashes to backslashes */
if (!toUnix) {
while (walker[0] != _UT('\0')) {
if (walker[0] == _UT('/')) {
walker[0] = _UT('\\');
}
walker++;
}
}
return URI_SUCCESS;
}
int URI_FUNC(UnixFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) {
return URI_FUNC(FilenameToUriString)(filename, uriString, URI_TRUE);
}
int URI_FUNC(WindowsFilenameToUriString)(const URI_CHAR * filename, URI_CHAR * uriString) {
return URI_FUNC(FilenameToUriString)(filename, uriString, URI_FALSE);
}
int URI_FUNC(UriStringToUnixFilename)(const URI_CHAR * uriString, URI_CHAR * filename) {
return URI_FUNC(UriStringToFilename)(uriString, filename, URI_TRUE);
}
int URI_FUNC(UriStringToWindowsFilename)(const URI_CHAR * uriString, URI_CHAR * filename) {
return URI_FUNC(UriStringToFilename)(uriString, filename, URI_FALSE);
}
#endif

View File

@ -0,0 +1,325 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriIp4.c
* Holds the IPv4 parser implementation.
* NOTE: This source file includes itself twice.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriIp4.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriIp4.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# endif
#ifndef URI_DOXYGEN
# include "UriIp4.h"
# include "UriIp4Base.h"
# include "UriBase.h"
#endif
/* Prototypes */
static const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast);
static const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast);
static const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast);
static const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast);
static const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast);
/*
* [ipFourAddress]->[decOctet]<.>[decOctet]<.>[decOctet]<.>[decOctet]
*/
int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput,
const URI_CHAR * first, const URI_CHAR * afterLast) {
const URI_CHAR * after;
UriIp4Parser parser;
/* Essential checks */
if ((octetOutput == NULL) || (first == NULL)
|| (afterLast <= first)) {
return URI_ERROR_SYNTAX;
}
/* Reset parser */
parser.stackCount = 0;
/* Octet #1 */
after = URI_FUNC(ParseDecOctet)(&parser, first, afterLast);
if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) {
return URI_ERROR_SYNTAX;
}
uriStackToOctet(&parser, octetOutput);
/* Octet #2 */
after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast);
if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) {
return URI_ERROR_SYNTAX;
}
uriStackToOctet(&parser, octetOutput + 1);
/* Octet #3 */
after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast);
if ((after == NULL) || (after >= afterLast) || (*after != _UT('.'))) {
return URI_ERROR_SYNTAX;
}
uriStackToOctet(&parser, octetOutput + 2);
/* Octet #4 */
after = URI_FUNC(ParseDecOctet)(&parser, after + 1, afterLast);
if (after != afterLast) {
return URI_ERROR_SYNTAX;
}
uriStackToOctet(&parser, octetOutput + 3);
return URI_SUCCESS;
}
/*
* [decOctet]-><0>
* [decOctet]-><1>[decOctetOne]
* [decOctet]-><2>[decOctetTwo]
* [decOctet]-><3>[decOctetThree]
* [decOctet]-><4>[decOctetThree]
* [decOctet]-><5>[decOctetThree]
* [decOctet]-><6>[decOctetThree]
* [decOctet]-><7>[decOctetThree]
* [decOctet]-><8>[decOctetThree]
* [decOctet]-><9>[decOctetThree]
*/
static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctet)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast) {
if (first >= afterLast) {
return NULL;
}
switch (*first) {
case _UT('0'):
uriPushToStack(parser, 0);
return first + 1;
case _UT('1'):
uriPushToStack(parser, 1);
return (const URI_CHAR *)URI_FUNC(ParseDecOctetOne)(parser, first + 1, afterLast);
case _UT('2'):
uriPushToStack(parser, 2);
return (const URI_CHAR *)URI_FUNC(ParseDecOctetTwo)(parser, first + 1, afterLast);
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast);
default:
return NULL;
}
}
/*
* [decOctetOne]-><NULL>
* [decOctetOne]->[DIGIT][decOctetThree]
*/
static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetOne)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast) {
if (first >= afterLast) {
return afterLast;
}
switch (*first) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast);
default:
return first;
}
}
/*
* [decOctetTwo]-><NULL>
* [decOctetTwo]-><0>[decOctetThree]
* [decOctetTwo]-><1>[decOctetThree]
* [decOctetTwo]-><2>[decOctetThree]
* [decOctetTwo]-><3>[decOctetThree]
* [decOctetTwo]-><4>[decOctetThree]
* [decOctetTwo]-><5>[decOctetFour]
* [decOctetTwo]-><6>
* [decOctetTwo]-><7>
* [decOctetTwo]-><8>
* [decOctetTwo]-><9>
*/
static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetTwo)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast) {
if (first >= afterLast) {
return afterLast;
}
switch (*first) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return (const URI_CHAR *)URI_FUNC(ParseDecOctetThree)(parser, first + 1, afterLast);
case _UT('5'):
uriPushToStack(parser, 5);
return (const URI_CHAR *)URI_FUNC(ParseDecOctetFour)(parser, first + 1, afterLast);
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return first + 1;
default:
return first;
}
}
/*
* [decOctetThree]-><NULL>
* [decOctetThree]->[DIGIT]
*/
static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetThree)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast) {
if (first >= afterLast) {
return afterLast;
}
switch (*first) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
case _UT('6'):
case _UT('7'):
case _UT('8'):
case _UT('9'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return first + 1;
default:
return first;
}
}
/*
* [decOctetFour]-><NULL>
* [decOctetFour]-><0>
* [decOctetFour]-><1>
* [decOctetFour]-><2>
* [decOctetFour]-><3>
* [decOctetFour]-><4>
* [decOctetFour]-><5>
*/
static URI_INLINE const URI_CHAR * URI_FUNC(ParseDecOctetFour)(UriIp4Parser * parser,
const URI_CHAR * first, const URI_CHAR * afterLast) {
if (first >= afterLast) {
return afterLast;
}
switch (*first) {
case _UT('0'):
case _UT('1'):
case _UT('2'):
case _UT('3'):
case _UT('4'):
case _UT('5'):
uriPushToStack(parser, (unsigned char)(9 + *first - _UT('9')));
return first + 1;
default:
return first;
}
}
#endif

View File

@ -0,0 +1,87 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriIp4.h
* Holds the IPv4 parser interface.
* NOTE: This header includes itself twice.
*/
#if (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI)) \
|| (defined(URI_PASS_UNICODE) && !defined(URI_IP4_TWICE_H_UNICODE)) \
|| (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriIp4.h"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriIp4.h"
# undef URI_PASS_UNICODE
/* Only one pass for each encoding */
#elif (defined(URI_PASS_ANSI) && !defined(URI_IP4_TWICE_H_ANSI) \
&& defined(URI_ENABLE_ANSI)) || (defined(URI_PASS_UNICODE) \
&& !defined(URI_IP4_TWICE_H_UNICODE) && defined(URI_ENABLE_UNICODE))
# ifdef URI_PASS_ANSI
# define URI_IP4_TWICE_H_ANSI 1
# include "UriDefsAnsi.h"
# else
# define URI_IP4_TWICE_H_UNICODE 1
# include "UriDefsUnicode.h"
# endif
/**
* Converts a IPv4 text representation into four bytes.
*
* @param octetOutput Output destination
* @param first First character of IPv4 text to parse
* @param afterLast Position to stop parsing at
* @return Error code or 0 on success
*/
int URI_FUNC(ParseIpFourAddress)(unsigned char * octetOutput,
const URI_CHAR * first, const URI_CHAR * afterLast);
#endif
#endif

View File

@ -0,0 +1,96 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriIp4Base.c
* Holds code independent of the encoding pass.
*/
#ifndef URI_DOXYGEN
# include "UriIp4Base.h"
#endif
void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet) {
switch (parser->stackCount) {
case 1:
*octet = parser->stackOne;
break;
case 2:
*octet = parser->stackOne * 10
+ parser->stackTwo;
break;
case 3:
*octet = parser->stackOne * 100
+ parser->stackTwo * 10
+ parser->stackThree;
break;
default:
;
}
parser->stackCount = 0;
}
void uriPushToStack(UriIp4Parser * parser, unsigned char digit) {
switch (parser->stackCount) {
case 0:
parser->stackOne = digit;
parser->stackCount = 1;
break;
case 1:
parser->stackTwo = digit;
parser->stackCount = 2;
break;
case 2:
parser->stackThree = digit;
parser->stackCount = 3;
break;
default:
;
}
}

View File

@ -0,0 +1,59 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_IP4_BASE_H
#define URI_IP4_BASE_H 1
typedef struct UriIp4ParserStruct {
unsigned char stackCount;
unsigned char stackOne;
unsigned char stackTwo;
unsigned char stackThree;
} UriIp4Parser;
void uriPushToStack(UriIp4Parser * parser, unsigned char digit);
void uriStackToOctet(UriIp4Parser * parser, unsigned char * octet);
#endif /* URI_IP4_BASE_H */

View File

@ -0,0 +1,722 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file UriNormalize.c
* Holds the RFC 3986 %URI normalization implementation.
* NOTE: This source file includes itself twice.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriNormalize.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriNormalize.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriNormalizeBase.h"
# include "UriCommon.h"
#endif
static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask,
unsigned int * outMask);
static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
unsigned int maskTest, URI_TYPE(TextRange) * range);
static UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri,
unsigned int * doneMask);
static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first,
const URI_CHAR ** afterLast);
static UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast);
static void URI_FUNC(FixPercentEncodingEngine)(
const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast);
static UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first,
const URI_CHAR * afterLast);
static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first,
const URI_CHAR * afterLast);
static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first,
const URI_CHAR * afterLast);
static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast);
static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask);
static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
unsigned int revertMask) {
if (revertMask & URI_NORMALIZE_SCHEME) {
free((URI_CHAR *)uri->scheme.first);
uri->scheme.first = NULL;
uri->scheme.afterLast = NULL;
}
if (revertMask & URI_NORMALIZE_USER_INFO) {
free((URI_CHAR *)uri->userInfo.first);
uri->userInfo.first = NULL;
uri->userInfo.afterLast = NULL;
}
if (revertMask & URI_NORMALIZE_HOST) {
if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
free((URI_CHAR *)uri->hostData.ipFuture.first);
uri->hostData.ipFuture.first = NULL;
uri->hostData.ipFuture.afterLast = NULL;
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
} else if ((uri->hostText.first != NULL)
&& (uri->hostData.ip4 == NULL)
&& (uri->hostData.ip6 == NULL)) {
/* Regname */
free((URI_CHAR *)uri->hostText.first);
uri->hostText.first = NULL;
uri->hostText.afterLast = NULL;
}
}
/* NOTE: Port cannot happen! */
if (revertMask & URI_NORMALIZE_PATH) {
URI_TYPE(PathSegment) * walker = uri->pathHead;
while (walker != NULL) {
URI_TYPE(PathSegment) * const next = walker->next;
if (walker->text.afterLast > walker->text.first) {
free((URI_CHAR *)walker->text.first);
}
free(walker);
walker = next;
}
uri->pathHead = NULL;
uri->pathTail = NULL;
}
if (revertMask & URI_NORMALIZE_QUERY) {
free((URI_CHAR *)uri->query.first);
uri->query.first = NULL;
uri->query.afterLast = NULL;
}
if (revertMask & URI_NORMALIZE_FRAGMENT) {
free((URI_CHAR *)uri->fragment.first);
uri->fragment.first = NULL;
uri->fragment.afterLast = NULL;
}
}
static URI_INLINE UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
const URI_CHAR * i = first;
for (; i < afterLast; i++) {
/* 6.2.2.1 Case Normalization: uppercase letters in scheme or host */
if ((*i >= _UT('A')) && (*i <= _UT('Z'))) {
return URI_TRUE;
}
}
}
return URI_FALSE;
}
static URI_INLINE UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
const URI_CHAR * i = first;
for (; i + 2 < afterLast; i++) {
if (i[0] == _UT('%')) {
/* 6.2.2.1 Case Normalization: *
* lowercase percent-encodings */
if (((i[1] >= _UT('a')) && (i[1] <= _UT('f')))
|| ((i[2] >= _UT('a')) && (i[2] <= _UT('f')))) {
return URI_TRUE;
} else {
/* 6.2.2.2 Percent-Encoding Normalization: *
* percent-encoded unreserved characters */
const unsigned char left = URI_FUNC(HexdigToInt)(i[1]);
const unsigned char right = URI_FUNC(HexdigToInt)(i[2]);
const int code = 16 * left + right;
if (uriIsUnreserved(code)) {
return URI_TRUE;
}
}
}
}
}
return URI_FALSE;
}
static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first,
const URI_CHAR * afterLast) {
if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
URI_CHAR * i = (URI_CHAR *)first;
const int lowerUpperDiff = (_UT('a') - _UT('A'));
for (; i < afterLast; i++) {
if ((*i >= _UT('A')) && (*i <=_UT('Z'))) {
*i = (URI_CHAR)(*i + lowerUpperDiff);
}
}
}
}
static URI_INLINE UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast) {
int lenInChars;
const int lowerUpperDiff = (_UT('a') - _UT('A'));
URI_CHAR * buffer;
int i = 0;
if ((first == NULL) || (afterLast == NULL) || (*first == NULL)
|| (*afterLast == NULL)) {
return URI_FALSE;
}
lenInChars = (int)(*afterLast - *first);
if (lenInChars == 0) {
return URI_TRUE;
} else if (lenInChars < 0) {
return URI_FALSE;
}
buffer = malloc(lenInChars * sizeof(URI_CHAR));
if (buffer == NULL) {
return URI_FALSE;
}
for (; i < lenInChars; i++) {
if (((*first)[i] >= _UT('A')) && ((*first)[i] <=_UT('Z'))) {
buffer[i] = (URI_CHAR)((*first)[i] + lowerUpperDiff);
} else {
buffer[i] = (*first)[i];
}
}
*first = buffer;
*afterLast = buffer + lenInChars;
return URI_TRUE;
}
/* NOTE: Implementation must stay inplace-compatible */
static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)(
const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast) {
URI_CHAR * write = (URI_CHAR *)outFirst;
const int lenInChars = (int)(inAfterLast - inFirst);
int i = 0;
/* All but last two */
for (; i + 2 < lenInChars; i++) {
if (inFirst[i] != _UT('%')) {
write[0] = inFirst[i];
write++;
} else {
/* 6.2.2.2 Percent-Encoding Normalization: *
* percent-encoded unreserved characters */
const URI_CHAR one = inFirst[i + 1];
const URI_CHAR two = inFirst[i + 2];
const unsigned char left = URI_FUNC(HexdigToInt)(one);
const unsigned char right = URI_FUNC(HexdigToInt)(two);
const int code = 16 * left + right;
if (uriIsUnreserved(code)) {
write[0] = (URI_CHAR)(code);
write++;
} else {
/* 6.2.2.1 Case Normalization: *
* lowercase percent-encodings */
write[0] = _UT('%');
write[1] = URI_FUNC(HexToLetter)(left);
write[2] = URI_FUNC(HexToLetter)(right);
write += 3;
}
i += 2; /* For the two chars of the percent group we just ate */
}
}
/* Last two */
for (; i < lenInChars; i++) {
write[0] = inFirst[i];
write++;
}
*outAfterLast = write;
}
static URI_INLINE void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first,
const URI_CHAR ** afterLast) {
/* Death checks */
if ((first == NULL) || (afterLast == NULL) || (*afterLast == NULL)) {
return;
}
/* Fix inplace */
URI_FUNC(FixPercentEncodingEngine)(first, *afterLast, first, afterLast);
}
static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first,
const URI_CHAR ** afterLast) {
int lenInChars;
URI_CHAR * buffer;
/* Death checks */
if ((first == NULL) || (afterLast == NULL)
|| (*first == NULL) || (*afterLast == NULL)) {
return URI_FALSE;
}
/* Old text length */
lenInChars = (int)(*afterLast - *first);
if (lenInChars == 0) {
return URI_TRUE;
} else if (lenInChars < 0) {
return URI_FALSE;
}
/* New buffer */
buffer = malloc(lenInChars * sizeof(URI_CHAR));
if (buffer == NULL) {
return URI_FALSE;
}
/* Fix on copy */
URI_FUNC(FixPercentEncodingEngine)(*first, *afterLast, buffer, afterLast);
*first = buffer;
return URI_TRUE;
}
static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
unsigned int maskTest, URI_TYPE(TextRange) * range) {
if (((*doneMask & maskTest) == 0)
&& (range->first != NULL)
&& (range->afterLast != NULL)
&& (range->afterLast > range->first)) {
const int lenInChars = (int)(range->afterLast - range->first);
const int lenInBytes = lenInChars * sizeof(URI_CHAR);
URI_CHAR * dup = malloc(lenInBytes);
if (dup == NULL) {
return URI_FALSE; /* Raises malloc error */
}
memcpy(dup, range->first, lenInBytes);
range->first = dup;
range->afterLast = dup + lenInChars;
*doneMask |= maskTest;
}
return URI_TRUE;
}
static URI_INLINE UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri,
unsigned int * doneMask) {
URI_TYPE(PathSegment) * walker = uri->pathHead;
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME,
&(uri->scheme))
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO,
&(uri->userInfo))
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY,
&(uri->query))
|| !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT,
&(uri->fragment))) {
return URI_FALSE; /* Raises malloc error */
}
/* Host */
if ((*doneMask & URI_NORMALIZE_HOST) == 0) {
if ((uri->hostData.ip4 == NULL)
&& (uri->hostData.ip6 == NULL)) {
if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
&(uri->hostData.ipFuture))) {
return URI_FALSE; /* Raises malloc error */
}
uri->hostText.first = uri->hostData.ipFuture.first;
uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
} else if (uri->hostText.first != NULL) {
/* Regname */
if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
&(uri->hostText))) {
return URI_FALSE; /* Raises malloc error */
}
}
}
}
/* Path */
if ((*doneMask & URI_NORMALIZE_PATH) == 0) {
while (walker != NULL) {
if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text))) {
/* Kill path to one before walker */
URI_TYPE(PathSegment) * ranger = uri->pathHead;
while (ranger->next != walker) {
URI_TYPE(PathSegment) * const next = ranger->next;
if ((ranger->text.first != NULL)
&& (ranger->text.afterLast != NULL)
&& (ranger->text.afterLast > ranger->text.first)) {
free((URI_CHAR *)ranger->text.first);
free(ranger);
}
ranger = next;
}
/* Kill path from walker */
while (walker != NULL) {
URI_TYPE(PathSegment) * const next = walker->next;
free(walker);
walker = next;
}
uri->pathHead = NULL;
uri->pathTail = NULL;
return URI_FALSE; /* Raises malloc error */
}
walker = walker->next;
}
*doneMask |= URI_NORMALIZE_PATH;
}
/* Port text, must come last so we don't have to undo that one if it fails. *
* Otherwise we would need and extra enum flag for it although the port *
* cannot go unnormalized... */
if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText))) {
return URI_FALSE; /* Raises malloc error */
}
return URI_TRUE;
}
unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri) {
unsigned int res;
#if defined(__GNUC__) && ((__GNUC__ > 4) \
|| ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2)))
/* Slower code that fixes a warning, not sure if this is a smart idea */
URI_TYPE(Uri) writeableClone;
memcpy(&writeableClone, uri, 1 * sizeof(URI_TYPE(Uri)));
URI_FUNC(NormalizeSyntaxEngine)(&writeableClone, 0, &res);
#else
URI_FUNC(NormalizeSyntaxEngine)((URI_TYPE(Uri) *)uri, 0, &res);
#endif
return res;
}
int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask) {
return URI_FUNC(NormalizeSyntaxEngine)(uri, mask, NULL);
}
int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) {
return URI_FUNC(NormalizeSyntaxEx)(uri, (unsigned int)-1);
}
static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask, unsigned int * outMask) {
unsigned int doneMask = URI_NORMALIZED;
if (uri == NULL) {
if (outMask != NULL) {
*outMask = URI_NORMALIZED;
return URI_SUCCESS;
} else {
return URI_ERROR_NULL;
}
}
if (outMask != NULL) {
/* Reset mask */
*outMask = URI_NORMALIZED;
} else if (inMask == URI_NORMALIZED) {
/* Nothing to do */
return URI_SUCCESS;
}
/* Scheme, host */
if (outMask != NULL) {
const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)(
uri->scheme.first, uri->scheme.afterLast);
const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)(
uri->hostText.first, uri->hostText.afterLast);
if (normalizeScheme) {
*outMask |= URI_NORMALIZE_SCHEME;
}
if (normalizeHostCase) {
*outMask |= URI_NORMALIZE_HOST;
} else {
const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)(
uri->hostText.first, uri->hostText.afterLast);
if (normalizeHostPrecent) {
*outMask |= URI_NORMALIZE_HOST;
}
}
} else {
/* Scheme */
if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) {
if (uri->owner) {
URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast);
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_SCHEME;
}
}
/* Host */
if (inMask & URI_NORMALIZE_HOST) {
if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
if (uri->owner) {
URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first,
uri->hostData.ipFuture.afterLast);
} else {
if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first),
&(uri->hostData.ipFuture.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
}
uri->hostText.first = uri->hostData.ipFuture.first;
uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
} else if ((uri->hostText.first != NULL)
&& (uri->hostData.ip4 == NULL)
&& (uri->hostData.ip6 == NULL)) {
/* Regname */
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first,
&(uri->hostText.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(
&(uri->hostText.first),
&(uri->hostText.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_HOST;
}
URI_FUNC(LowercaseInplace)(uri->hostText.first,
uri->hostText.afterLast);
}
}
}
/* User info */
if (outMask != NULL) {
const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(
uri->userInfo.first, uri->userInfo.afterLast);
if (normalizeUserInfo) {
*outMask |= URI_NORMALIZE_USER_INFO;
}
} else {
if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first),
&(uri->userInfo.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_USER_INFO;
}
}
}
/* Path */
if (outMask != NULL) {
const URI_TYPE(PathSegment) * walker = uri->pathHead;
while (walker != NULL) {
const URI_CHAR * const first = walker->text.first;
const URI_CHAR * const afterLast = walker->text.afterLast;
if ((first != NULL)
&& (afterLast != NULL)
&& (afterLast > first)
&& (
(((afterLast - first) == 1)
&& (first[0] == _UT('.')))
||
(((afterLast - first) == 2)
&& (first[0] == _UT('.'))
&& (first[1] == _UT('.')))
||
URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast)
)) {
*outMask |= URI_NORMALIZE_PATH;
break;
}
walker = walker->next;
}
} else if (inMask & URI_NORMALIZE_PATH) {
URI_TYPE(PathSegment) * walker;
const UriBool relative = ((uri->scheme.first == NULL)
&& !uri->absolutePath) ? URI_TRUE : URI_FALSE;
/* Fix percent-encoding for each segment */
walker = uri->pathHead;
if (uri->owner) {
while (walker != NULL) {
URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast));
walker = walker->next;
}
} else {
while (walker != NULL) {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first),
&(walker->text.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
walker = walker->next;
}
doneMask |= URI_NORMALIZE_PATH;
}
/* 6.2.2.3 Path Segment Normalization */
if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative,
(uri->owner == URI_TRUE)
|| ((doneMask & URI_NORMALIZE_PATH) != 0)
)) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
URI_FUNC(FixEmptyTrailSegment)(uri);
}
/* Query, fragment */
if (outMask != NULL) {
const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)(
uri->query.first, uri->query.afterLast);
const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)(
uri->fragment.first, uri->fragment.afterLast);
if (normalizeQuery) {
*outMask |= URI_NORMALIZE_QUERY;
}
if (normalizeFragment) {
*outMask |= URI_NORMALIZE_FRAGMENT;
}
} else {
/* Query */
if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first),
&(uri->query.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_QUERY;
}
}
/* Fragment */
if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) {
if (uri->owner) {
URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast));
} else {
if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first),
&(uri->fragment.afterLast))) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
doneMask |= URI_NORMALIZE_FRAGMENT;
}
}
}
/* Dup all not duped yet */
if ((outMask == NULL) && !uri->owner) {
if (!URI_FUNC(MakeOwner)(uri, &doneMask)) {
URI_FUNC(PreventLeakage)(uri, doneMask);
return URI_ERROR_MALLOC;
}
uri->owner = URI_TRUE;
}
return URI_SUCCESS;
}
#endif

View File

@ -0,0 +1,119 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_DOXYGEN
# include "UriNormalizeBase.h"
#endif
UriBool uriIsUnreserved(int code) {
switch (code) {
case L'a': /* ALPHA */
case L'A':
case L'b':
case L'B':
case L'c':
case L'C':
case L'd':
case L'D':
case L'e':
case L'E':
case L'f':
case L'F':
case L'g':
case L'G':
case L'h':
case L'H':
case L'i':
case L'I':
case L'j':
case L'J':
case L'k':
case L'K':
case L'l':
case L'L':
case L'm':
case L'M':
case L'n':
case L'N':
case L'o':
case L'O':
case L'p':
case L'P':
case L'q':
case L'Q':
case L'r':
case L'R':
case L's':
case L'S':
case L't':
case L'T':
case L'u':
case L'U':
case L'v':
case L'V':
case L'w':
case L'W':
case L'x':
case L'X':
case L'y':
case L'Y':
case L'z':
case L'Z':
case L'0': /* DIGIT */
case L'1':
case L'2':
case L'3':
case L'4':
case L'5':
case L'6':
case L'7':
case L'8':
case L'9':
case L'-': /* "-" / "." / "_" / "~" */
case L'.':
case L'_':
case L'~':
return URI_TRUE;
default:
return URI_FALSE;
}
}

View File

@ -0,0 +1,53 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_NORMALIZE_BASE_H
#define URI_NORMALIZE_BASE_H 1
#include "UriBase.h"
UriBool uriIsUnreserved(int code);
#endif /* URI_NORMALIZE_BASE_H */

View File

@ -0,0 +1,90 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_DOXYGEN
# include "UriParseBase.h"
#endif
void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount, unsigned char * output) {
switch (digitCount) {
case 1:
/* 0x___? -> \x00 \x0? */
output[0] = 0;
output[1] = hexDigits[0];
break;
case 2:
/* 0x__?? -> \0xx \x?? */
output[0] = 0;
output[1] = 16 * hexDigits[0] + hexDigits[1];
break;
case 3:
/* 0x_??? -> \0x? \x?? */
output[0] = hexDigits[0];
output[1] = 16 * hexDigits[1] + hexDigits[2];
break;
case 4:
/* 0x???? -> \0?? \x?? */
output[0] = 16 * hexDigits[0] + hexDigits[1];
output[1] = 16 * hexDigits[2] + hexDigits[3];
break;
}
}
unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount) {
switch (digitCount) {
case 1:
return digits[0];
case 2:
return 10 * digits[0] + digits[1];
case 3:
default:
return 100 * digits[0] + 10 * digits[1] + digits[2];
}
}

View File

@ -0,0 +1,55 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef URI_PARSE_BASE_H
#define URI_PARSE_BASE_H 1
#include "UriBase.h"
void uriWriteQuadToDoubleByte(const unsigned char * hexDigits, int digitCount,
unsigned char * output);
unsigned char uriGetOctetValue(const unsigned char * digits, int digitCount);
#endif /* URI_PARSE_BASE_H */

View File

@ -0,0 +1,456 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriQuery.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriQuery.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
static int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList,
int maxChars, int * charsWritten, int * charsRequired,
UriBool spaceToPlus, UriBool normalizeBreaks);
static UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext,
int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter,
const URI_CHAR * valueFirst, const URI_CHAR * valueAfter,
UriBool plusToSpace, UriBreakConversion breakConversion);
int URI_FUNC(ComposeQueryCharsRequired)(const URI_TYPE(QueryList) * queryList,
int * charsRequired) {
const UriBool spaceToPlus = URI_TRUE;
const UriBool normalizeBreaks = URI_TRUE;
return URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, charsRequired,
spaceToPlus, normalizeBreaks);
}
int URI_FUNC(ComposeQueryCharsRequiredEx)(const URI_TYPE(QueryList) * queryList,
int * charsRequired, UriBool spaceToPlus, UriBool normalizeBreaks) {
if ((queryList == NULL) || (charsRequired == NULL)) {
return URI_ERROR_NULL;
}
return URI_FUNC(ComposeQueryEngine)(NULL, queryList, 0, NULL,
charsRequired, spaceToPlus, normalizeBreaks);
}
int URI_FUNC(ComposeQuery)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten) {
const UriBool spaceToPlus = URI_TRUE;
const UriBool normalizeBreaks = URI_TRUE;
return URI_FUNC(ComposeQueryEx)(dest, queryList, maxChars, charsWritten,
spaceToPlus, normalizeBreaks);
}
int URI_FUNC(ComposeQueryEx)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList, int maxChars, int * charsWritten,
UriBool spaceToPlus, UriBool normalizeBreaks) {
if ((dest == NULL) || (queryList == NULL)) {
return URI_ERROR_NULL;
}
if (maxChars < 1) {
return URI_ERROR_OUTPUT_TOO_LARGE;
}
return URI_FUNC(ComposeQueryEngine)(dest, queryList, maxChars,
charsWritten, NULL, spaceToPlus, normalizeBreaks);
}
int URI_FUNC(ComposeQueryMalloc)(URI_CHAR ** dest,
const URI_TYPE(QueryList) * queryList) {
const UriBool spaceToPlus = URI_TRUE;
const UriBool normalizeBreaks = URI_TRUE;
return URI_FUNC(ComposeQueryMallocEx)(dest, queryList,
spaceToPlus, normalizeBreaks);
}
int URI_FUNC(ComposeQueryMallocEx)(URI_CHAR ** dest,
const URI_TYPE(QueryList) * queryList,
UriBool spaceToPlus, UriBool normalizeBreaks) {
int charsRequired;
int res;
URI_CHAR * queryString;
if (dest == NULL) {
return URI_ERROR_NULL;
}
/* Calculate space */
res = URI_FUNC(ComposeQueryCharsRequiredEx)(queryList, &charsRequired,
spaceToPlus, normalizeBreaks);
if (res != URI_SUCCESS) {
return res;
}
charsRequired++;
/* Allocate space */
queryString = malloc(charsRequired * sizeof(URI_CHAR));
if (queryString == NULL) {
return URI_ERROR_MALLOC;
}
/* Put query in */
res = URI_FUNC(ComposeQueryEx)(queryString, queryList, charsRequired,
NULL, spaceToPlus, normalizeBreaks);
if (res != URI_SUCCESS) {
free(queryString);
return res;
}
*dest = queryString;
return URI_SUCCESS;
}
int URI_FUNC(ComposeQueryEngine)(URI_CHAR * dest,
const URI_TYPE(QueryList) * queryList,
int maxChars, int * charsWritten, int * charsRequired,
UriBool spaceToPlus, UriBool normalizeBreaks) {
UriBool firstItem = URI_TRUE;
int ampersandLen = 0;
URI_CHAR * write = dest;
/* Subtract terminator */
if (dest == NULL) {
*charsRequired = 0;
} else {
maxChars--;
}
while (queryList != NULL) {
const URI_CHAR * const key = queryList->key;
const URI_CHAR * const value = queryList->value;
const int worstCase = (normalizeBreaks == URI_TRUE ? 6 : 3);
const int keyLen = (key == NULL) ? 0 : (int)URI_STRLEN(key);
const int keyRequiredChars = worstCase * keyLen;
const int valueLen = (value == NULL) ? 0 : (int)URI_STRLEN(value);
const int valueRequiredChars = worstCase * valueLen;
if (dest == NULL) {
if (firstItem == URI_TRUE) {
ampersandLen = 1;
firstItem = URI_FALSE;
}
(*charsRequired) += ampersandLen + keyRequiredChars + ((value == NULL)
? 0
: 1 + valueRequiredChars);
} else {
URI_CHAR * afterKey;
if ((write - dest) + ampersandLen + keyRequiredChars > maxChars) {
return URI_ERROR_OUTPUT_TOO_LARGE;
}
/* Copy key */
if (firstItem == URI_TRUE) {
firstItem = URI_FALSE;
} else {
write[0] = _UT('&');
write++;
}
afterKey = URI_FUNC(EscapeEx)(key, key + keyLen,
write, spaceToPlus, normalizeBreaks);
write += (afterKey - write);
if (value != NULL) {
URI_CHAR * afterValue;
if ((write - dest) + 1 + valueRequiredChars > maxChars) {
return URI_ERROR_OUTPUT_TOO_LARGE;
}
/* Copy value */
write[0] = _UT('=');
write++;
afterValue = URI_FUNC(EscapeEx)(value, value + valueLen,
write, spaceToPlus, normalizeBreaks);
write += (afterValue - write);
}
}
queryList = queryList->next;
}
if (dest != NULL) {
write[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = (int)(write - dest) + 1; /* .. for terminator */
}
}
return URI_SUCCESS;
}
UriBool URI_FUNC(AppendQueryItem)(URI_TYPE(QueryList) ** prevNext,
int * itemCount, const URI_CHAR * keyFirst, const URI_CHAR * keyAfter,
const URI_CHAR * valueFirst, const URI_CHAR * valueAfter,
UriBool plusToSpace, UriBreakConversion breakConversion) {
const int keyLen = (int)(keyAfter - keyFirst);
const int valueLen = (int)(valueAfter - valueFirst);
URI_CHAR * key;
URI_CHAR * value;
if ((prevNext == NULL) || (itemCount == NULL)
|| (keyFirst == NULL) || (keyAfter == NULL)
|| (keyFirst > keyAfter) || (valueFirst > valueAfter)
|| ((keyFirst == keyAfter)
&& (valueFirst == NULL) && (valueAfter == NULL))) {
return URI_TRUE;
}
/* Append new empty item */
*prevNext = malloc(1 * sizeof(URI_TYPE(QueryList)));
if (*prevNext == NULL) {
return URI_FALSE; /* Raises malloc error */
}
(*prevNext)->next = NULL;
/* Fill key */
key = malloc((keyLen + 1) * sizeof(URI_CHAR));
if (key == NULL) {
free(*prevNext);
*prevNext = NULL;
return URI_FALSE; /* Raises malloc error */
}
key[keyLen] = _UT('\0');
if (keyLen > 0) {
/* Copy 1:1 */
memcpy(key, keyFirst, keyLen * sizeof(URI_CHAR));
/* Unescape */
URI_FUNC(UnescapeInPlaceEx)(key, plusToSpace, breakConversion);
}
(*prevNext)->key = key;
/* Fill value */
if (valueFirst != NULL) {
value = malloc((valueLen + 1) * sizeof(URI_CHAR));
if (value == NULL) {
free(key);
free(*prevNext);
*prevNext = NULL;
return URI_FALSE; /* Raises malloc error */
}
value[valueLen] = _UT('\0');
if (valueLen > 0) {
/* Copy 1:1 */
memcpy(value, valueFirst, valueLen * sizeof(URI_CHAR));
/* Unescape */
URI_FUNC(UnescapeInPlaceEx)(value, plusToSpace, breakConversion);
}
(*prevNext)->value = value;
} else {
value = NULL;
}
(*prevNext)->value = value;
(*itemCount)++;
return URI_TRUE;
}
void URI_FUNC(FreeQueryList)(URI_TYPE(QueryList) * queryList) {
while (queryList != NULL) {
URI_TYPE(QueryList) * nextBackup = queryList->next;
free(queryList->key);
free(queryList->value);
free(queryList);
queryList = nextBackup;
}
}
int URI_FUNC(DissectQueryMalloc)(URI_TYPE(QueryList) ** dest, int * itemCount,
const URI_CHAR * first, const URI_CHAR * afterLast) {
const UriBool plusToSpace = URI_TRUE;
const UriBreakConversion breakConversion = URI_BR_DONT_TOUCH;
return URI_FUNC(DissectQueryMallocEx)(dest, itemCount, first, afterLast,
plusToSpace, breakConversion);
}
int URI_FUNC(DissectQueryMallocEx)(URI_TYPE(QueryList) ** dest, int * itemCount,
const URI_CHAR * first, const URI_CHAR * afterLast,
UriBool plusToSpace, UriBreakConversion breakConversion) {
const URI_CHAR * walk = first;
const URI_CHAR * keyFirst = first;
const URI_CHAR * keyAfter = NULL;
const URI_CHAR * valueFirst = NULL;
const URI_CHAR * valueAfter = NULL;
URI_TYPE(QueryList) ** prevNext = dest;
int nullCounter;
int * itemsAppended = (itemCount == NULL) ? &nullCounter : itemCount;
if ((dest == NULL) || (first == NULL) || (afterLast == NULL)) {
return URI_ERROR_NULL;
}
if (first > afterLast) {
return URI_ERROR_RANGE_INVALID;
}
*dest = NULL;
*itemsAppended = 0;
/* Parse query string */
for (; walk < afterLast; walk++) {
switch (*walk) {
case _UT('&'):
if (valueFirst != NULL) {
valueAfter = walk;
} else {
keyAfter = walk;
}
if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended,
keyFirst, keyAfter, valueFirst, valueAfter,
plusToSpace, breakConversion)
== URI_FALSE) {
/* Free list we built */
*itemsAppended = 0;
URI_FUNC(FreeQueryList)(*dest);
return URI_ERROR_MALLOC;
}
/* Make future items children of the current */
if ((prevNext != NULL) && (*prevNext != NULL)) {
prevNext = &((*prevNext)->next);
}
if (walk + 1 < afterLast) {
keyFirst = walk + 1;
} else {
keyFirst = NULL;
}
keyAfter = NULL;
valueFirst = NULL;
valueAfter = NULL;
break;
case _UT('='):
/* NOTE: WE treat the first '=' as a separator, */
/* all following go into the value part */
if (keyAfter == NULL) {
keyAfter = walk;
if (walk + 1 < afterLast) {
valueFirst = walk + 1;
valueAfter = walk + 1;
}
}
break;
default:
break;
}
}
if (valueFirst != NULL) {
/* Must be key/value pair */
valueAfter = walk;
} else {
/* Must be key only */
keyAfter = walk;
}
if (URI_FUNC(AppendQueryItem)(prevNext, itemsAppended, keyFirst, keyAfter,
valueFirst, valueAfter, plusToSpace, breakConversion)
== URI_FALSE) {
/* Free list we built */
*itemsAppended = 0;
URI_FUNC(FreeQueryList)(*dest);
return URI_ERROR_MALLOC;
}
return URI_SUCCESS;
}
#endif

View File

@ -0,0 +1,573 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriRecompose.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriRecompose.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
static int URI_FUNC(ToStringEngine)(URI_CHAR * dest, const URI_TYPE(Uri) * uri,
int maxChars, int * charsWritten, int * charsRequired);
int URI_FUNC(ToStringCharsRequired)(const URI_TYPE(Uri) * uri,
int * charsRequired) {
const int MAX_CHARS = ((unsigned int)-1) >> 1;
return URI_FUNC(ToStringEngine)(NULL, uri, MAX_CHARS, NULL, charsRequired);
}
int URI_FUNC(ToString)(URI_CHAR * dest, const URI_TYPE(Uri) * uri,
int maxChars, int * charsWritten) {
return URI_FUNC(ToStringEngine)(dest, uri, maxChars, charsWritten, NULL);
}
static URI_INLINE int URI_FUNC(ToStringEngine)(URI_CHAR * dest,
const URI_TYPE(Uri) * uri, int maxChars, int * charsWritten,
int * charsRequired) {
int written = 0;
if ((uri == NULL) || ((dest == NULL) && (charsRequired == NULL))) {
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_NULL;
}
if (maxChars < 1) {
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
maxChars--; /* So we don't have to substract 1 for '\0' all the time */
/* [01/19] result = "" */
if (dest != NULL) {
dest[0] = _UT('\0');
} else {
(*charsRequired) = 0;
}
/* [02/19] if defined(scheme) then */
if (uri->scheme.first != NULL) {
/* [03/19] append scheme to result; */
const int charsToWrite
= (int)(uri->scheme.afterLast - uri->scheme.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->scheme.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite;
}
/* [04/19] append ":" to result; */
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT(":"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
/* [05/19] endif; */
}
/* [06/19] if defined(authority) then */
if (URI_FUNC(IsHostSet)(uri)) {
/* [07/19] append "//" to result; */
if (dest != NULL) {
if (written + 2 <= maxChars) {
memcpy(dest + written, _UT("//"),
2 * sizeof(URI_CHAR));
written += 2;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 2;
}
/* [08/19] append authority to result; */
/* UserInfo */
if (uri->userInfo.first != NULL) {
const int charsToWrite = (int)(uri->userInfo.afterLast - uri->userInfo.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->userInfo.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("@"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite + 1;
}
}
/* Host */
if (uri->hostData.ip4 != NULL) {
/* IPv4 */
int i = 0;
for (; i < 4; i++) {
const unsigned char value = uri->hostData.ip4->data[i];
const int charsToWrite = (value > 99) ? 3 : ((value > 9) ? 2 : 1);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
URI_CHAR text[4];
if (value > 99) {
text[0] = _UT('0') + (value / 100);
text[1] = _UT('0') + ((value % 100) / 10);
text[2] = _UT('0') + (value % 10);
} else if (value > 9) {
text[0] = _UT('0') + (value / 10);
text[1] = _UT('0') + (value % 10);
} else {
text[0] = _UT('0') + value;
}
text[charsToWrite] = _UT('\0');
memcpy(dest + written, text, charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
if (i < 3) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("."),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
}
} else {
(*charsRequired) += charsToWrite + 1;
}
}
} else if (uri->hostData.ip6 != NULL) {
/* IPv6 */
int i = 0;
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("["),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
for (; i < 16; i++) {
const unsigned char value = uri->hostData.ip6->data[i];
if (dest != NULL) {
if (written + 2 <= maxChars) {
URI_CHAR text[3];
text[0] = URI_FUNC(HexToLetterEx)(value / 16, URI_FALSE);
text[1] = URI_FUNC(HexToLetterEx)(value % 16, URI_FALSE);
text[2] = _UT('\0');
memcpy(dest + written, text, 2 * sizeof(URI_CHAR));
written += 2;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 2;
}
if (((i & 1) == 1) && (i < 15)) {
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT(":"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
}
}
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("]"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
} else if (uri->hostData.ipFuture.first != NULL) {
/* IPvFuture */
const int charsToWrite = (int)(uri->hostData.ipFuture.afterLast
- uri->hostData.ipFuture.first);
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("["),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->hostData.ipFuture.first, charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("]"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1 + charsToWrite + 1;
}
} else if (uri->hostText.first != NULL) {
/* Regname */
const int charsToWrite = (int)(uri->hostText.afterLast - uri->hostText.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->hostText.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite;
}
}
/* Port */
if (uri->portText.first != NULL) {
const int charsToWrite = (int)(uri->portText.afterLast - uri->portText.first);
if (dest != NULL) {
/* Leading ':' */
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT(":"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
/* Port number */
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->portText.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1 + charsToWrite;
}
}
/* [09/19] endif; */
}
/* [10/19] append path to result; */
/* Slash needed here? */
if (uri->absolutePath || ((uri->pathHead != NULL)
&& URI_FUNC(IsHostSet)(uri))) {
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("/"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
}
if (uri->pathHead != NULL) {
URI_TYPE(PathSegment) * walker = uri->pathHead;
do {
const int charsToWrite = (int)(walker->text.afterLast - walker->text.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, walker->text.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite;
}
/* Not last segment -> append slash */
if (walker->next != NULL) {
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("/"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
}
walker = walker->next;
} while (walker != NULL);
}
/* [11/19] if defined(query) then */
if (uri->query.first != NULL) {
/* [12/19] append "?" to result; */
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("?"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
/* [13/19] append query to result; */
{
const int charsToWrite
= (int)(uri->query.afterLast - uri->query.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->query.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite;
}
}
/* [14/19] endif; */
}
/* [15/19] if defined(fragment) then */
if (uri->fragment.first != NULL) {
/* [16/19] append "#" to result; */
if (dest != NULL) {
if (written + 1 <= maxChars) {
memcpy(dest + written, _UT("#"),
1 * sizeof(URI_CHAR));
written += 1;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += 1;
}
/* [17/19] append fragment to result; */
{
const int charsToWrite
= (int)(uri->fragment.afterLast - uri->fragment.first);
if (dest != NULL) {
if (written + charsToWrite <= maxChars) {
memcpy(dest + written, uri->fragment.first,
charsToWrite * sizeof(URI_CHAR));
written += charsToWrite;
} else {
dest[0] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = 0;
}
return URI_ERROR_TOSTRING_TOO_LONG;
}
} else {
(*charsRequired) += charsToWrite;
}
}
/* [18/19] endif; */
}
/* [19/19] return result; */
if (dest != NULL) {
dest[written++] = _UT('\0');
if (charsWritten != NULL) {
*charsWritten = written;
}
}
return URI_SUCCESS;
}
#endif

View File

@ -0,0 +1,255 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriResolve.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriResolve.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
/* Appends a relative URI to an absolute. The last path segement of
* the absolute URI is replaced. */
static URI_INLINE UriBool URI_FUNC(MergePath)(URI_TYPE(Uri) * absWork,
const URI_TYPE(Uri) * relAppend) {
URI_TYPE(PathSegment) * sourceWalker;
URI_TYPE(PathSegment) * destPrev;
if (relAppend->pathHead == NULL) {
return URI_TRUE;
}
/* Replace last segment ("" if trailing slash) with first of append chain */
if (absWork->pathHead == NULL) {
URI_TYPE(PathSegment) * const dup = malloc(sizeof(URI_TYPE(PathSegment)));
if (dup == NULL) {
return URI_FALSE; /* Raises malloc error */
}
dup->next = NULL;
absWork->pathHead = dup;
absWork->pathTail = dup;
}
absWork->pathTail->text.first = relAppend->pathHead->text.first;
absWork->pathTail->text.afterLast = relAppend->pathHead->text.afterLast;
/* Append all the others */
sourceWalker = relAppend->pathHead->next;
if (sourceWalker == NULL) {
return URI_TRUE;
}
destPrev = absWork->pathTail;
for (;;) {
URI_TYPE(PathSegment) * const dup = malloc(sizeof(URI_TYPE(PathSegment)));
if (dup == NULL) {
destPrev->next = NULL;
absWork->pathTail = destPrev;
return URI_FALSE; /* Raises malloc error */
}
dup->text = sourceWalker->text;
destPrev->next = dup;
if (sourceWalker->next == NULL) {
absWork->pathTail = dup;
absWork->pathTail->next = NULL;
break;
}
destPrev = dup;
sourceWalker = sourceWalker->next;
}
return URI_TRUE;
}
static int URI_FUNC(AddBaseUriImpl)(URI_TYPE(Uri) * absDest,
const URI_TYPE(Uri) * relSource,
const URI_TYPE(Uri) * absBase) {
if (absDest == NULL) {
return URI_ERROR_NULL;
}
URI_FUNC(ResetUri)(absDest);
if ((relSource == NULL) || (absBase == NULL)) {
return URI_ERROR_NULL;
}
/* absBase absolute? */
if (absBase->scheme.first == NULL) {
return URI_ERROR_ADDBASE_REL_BASE;
}
/* [01/32] if defined(R.scheme) then */
if (relSource->scheme.first != NULL) {
/* [02/32] T.scheme = R.scheme; */
absDest->scheme = relSource->scheme;
/* [03/32] T.authority = R.authority; */
if (!URI_FUNC(CopyAuthority)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
/* [04/32] T.path = remove_dot_segments(R.path); */
if (!URI_FUNC(CopyPath)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) {
return URI_ERROR_MALLOC;
}
/* [05/32] T.query = R.query; */
absDest->query = relSource->query;
/* [06/32] else */
} else {
/* [07/32] if defined(R.authority) then */
if (URI_FUNC(IsHostSet)(relSource)) {
/* [08/32] T.authority = R.authority; */
if (!URI_FUNC(CopyAuthority)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
/* [09/32] T.path = remove_dot_segments(R.path); */
if (!URI_FUNC(CopyPath)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) {
return URI_ERROR_MALLOC;
}
/* [10/32] T.query = R.query; */
absDest->query = relSource->query;
/* [11/32] else */
} else {
/* [28/32] T.authority = Base.authority; */
if (!URI_FUNC(CopyAuthority)(absDest, absBase)) {
return URI_ERROR_MALLOC;
}
/* [12/32] if (R.path == "") then */
if (relSource->pathHead == NULL) {
/* [13/32] T.path = Base.path; */
if (!URI_FUNC(CopyPath)(absDest, absBase)) {
return URI_ERROR_MALLOC;
}
/* [14/32] if defined(R.query) then */
if (relSource->query.first != NULL) {
/* [15/32] T.query = R.query; */
absDest->query = relSource->query;
/* [16/32] else */
} else {
/* [17/32] T.query = Base.query; */
absDest->query = absBase->query;
/* [18/32] endif; */
}
/* [19/32] else */
} else {
/* [20/32] if (R.path starts-with "/") then */
if (relSource->absolutePath) {
/* [21/32] T.path = remove_dot_segments(R.path); */
if (!URI_FUNC(CopyPath)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) {
return URI_ERROR_MALLOC;
}
/* [22/32] else */
} else {
/* [23/32] T.path = merge(Base.path, R.path); */
if (!URI_FUNC(CopyPath)(absDest, absBase)) {
return URI_ERROR_MALLOC;
}
if (!URI_FUNC(MergePath)(absDest, relSource)) {
return URI_ERROR_MALLOC;
}
/* [24/32] T.path = remove_dot_segments(T.path); */
if (!URI_FUNC(RemoveDotSegmentsAbsolute)(absDest)) {
return URI_ERROR_MALLOC;
}
if (!URI_FUNC(FixAmbiguity)(absDest)) {
return URI_ERROR_MALLOC;
}
/* [25/32] endif; */
}
/* [26/32] T.query = R.query; */
absDest->query = relSource->query;
/* [27/32] endif; */
}
URI_FUNC(FixEmptyTrailSegment)(absDest);
/* [29/32] endif; */
}
/* [30/32] T.scheme = Base.scheme; */
absDest->scheme = absBase->scheme;
/* [31/32] endif; */
}
/* [32/32] T.fragment = R.fragment; */
absDest->fragment = relSource->fragment;
return URI_SUCCESS;
}
int URI_FUNC(AddBaseUri)(URI_TYPE(Uri) * absDest,
const URI_TYPE(Uri) * relSource, const URI_TYPE(Uri) * absBase) {
const int res = URI_FUNC(AddBaseUriImpl)(absDest, relSource, absBase);
if ((res != URI_SUCCESS) && (absDest != NULL)) {
URI_FUNC(FreeUriMembers)(absDest);
}
return res;
}
#endif

View File

@ -0,0 +1,316 @@
/*
* uriparser - RFC 3986 URI parsing library
*
* Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
* Copyright (C) 2007, Sebastian Pipping <webmaster@hartwork.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the <ORGANIZATION> nor the names of its
* contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* What encodings are enabled? */
#include "UriDefsConfig.h"
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
# define URI_PASS_ANSI 1
# include "UriShorten.c"
# undef URI_PASS_ANSI
# define URI_PASS_UNICODE 1
# include "UriShorten.c"
# undef URI_PASS_UNICODE
#else
# ifdef URI_PASS_ANSI
# include "UriDefsAnsi.h"
# else
# include "UriDefsUnicode.h"
# include <wchar.h>
# endif
#ifndef URI_DOXYGEN
# include "Uri.h"
# include "UriCommon.h"
#endif
static URI_INLINE UriBool URI_FUNC(AppendSegment)(URI_TYPE(Uri) * uri,
const URI_CHAR * first, const URI_CHAR * afterLast) {
/* Create segment */
URI_TYPE(PathSegment) * segment = malloc(1 * sizeof(URI_TYPE(PathSegment)));
if (segment == NULL) {
return URI_FALSE; /* Raises malloc error */
}
segment->next = NULL;
segment->text.first = first;
segment->text.afterLast = afterLast;
/* Put into chain */
if (uri->pathTail == NULL) {
uri->pathHead = segment;
} else {
uri->pathTail->next = segment;
}
uri->pathTail = segment;
return URI_TRUE;
}
static URI_INLINE UriBool URI_FUNC(EqualsAuthority)(const URI_TYPE(Uri) * first,
const URI_TYPE(Uri) * second) {
/* IPv4 */
if (first->hostData.ip4 != NULL) {
return ((second->hostData.ip4 != NULL)
&& !memcmp(first->hostData.ip4->data,
second->hostData.ip4->data, 4)) ? URI_TRUE : URI_FALSE;
}
/* IPv6 */
if (first->hostData.ip6 != NULL) {
return ((second->hostData.ip6 != NULL)
&& !memcmp(first->hostData.ip6->data,
second->hostData.ip6->data, 16)) ? URI_TRUE : URI_FALSE;
}
/* IPvFuture */
if (first->hostData.ipFuture.first != NULL) {
return ((second->hostData.ipFuture.first != NULL)
&& !URI_STRNCMP(first->hostData.ipFuture.first,
second->hostData.ipFuture.first,
first->hostData.ipFuture.afterLast
- first->hostData.ipFuture.first))
? URI_TRUE : URI_FALSE;
}
if (first->hostText.first != NULL) {
return ((second->hostText.first != NULL)
&& !URI_STRNCMP(first->hostText.first,
second->hostText.first,
first->hostText.afterLast
- first->hostText.first)) ? URI_TRUE : URI_FALSE;
}
return (second->hostText.first == NULL);
}
int URI_FUNC(RemoveBaseUriImpl)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * absSource,
const URI_TYPE(Uri) * absBase,
UriBool domainRootMode) {
if (dest == NULL) {
return URI_ERROR_NULL;
}
URI_FUNC(ResetUri)(dest);
if ((absSource == NULL) || (absBase == NULL)) {
return URI_ERROR_NULL;
}
/* absBase absolute? */
if (absBase->scheme.first == NULL) {
return URI_ERROR_REMOVEBASE_REL_BASE;
}
/* absSource absolute? */
if (absSource->scheme.first == NULL) {
return URI_ERROR_REMOVEBASE_REL_SOURCE;
}
/* [01/50] if (A.scheme != Base.scheme) then */
if (URI_STRNCMP(absSource->scheme.first, absBase->scheme.first,
absSource->scheme.afterLast - absSource->scheme.first)) {
/* [02/50] T.scheme = A.scheme; */
dest->scheme = absSource->scheme;
/* [03/50] T.authority = A.authority; */
if (!URI_FUNC(CopyAuthority)(dest, absSource)) {
return URI_ERROR_MALLOC;
}
/* [04/50] T.path = A.path; */
if (!URI_FUNC(CopyPath)(dest, absSource)) {
return URI_ERROR_MALLOC;
}
/* [05/50] else */
} else {
/* [06/50] undef(T.scheme); */
/* NOOP */
/* [07/50] if (A.authority != Base.authority) then */
if (!URI_FUNC(EqualsAuthority)(absSource, absBase)) {
/* [08/50] T.authority = A.authority; */
if (!URI_FUNC(CopyAuthority)(dest, absSource)) {
return URI_ERROR_MALLOC;
}
/* [09/50] T.path = A.path; */
if (!URI_FUNC(CopyPath)(dest, absSource)) {
return URI_ERROR_MALLOC;
}
/* [10/50] else */
} else {
/* [11/50] if domainRootMode then */
if (domainRootMode == URI_TRUE) {
/* [12/50] undef(T.authority); */
/* NOOP */
/* [13/50] if (first(A.path) == "") then */
/* GROUPED */
/* [14/50] T.path = "/." + A.path; */
/* GROUPED */
/* [15/50] else */
/* GROUPED */
/* [16/50] T.path = A.path; */
/* GROUPED */
/* [17/50] endif; */
if (!URI_FUNC(CopyPath)(dest, absSource)) {
return URI_ERROR_MALLOC;
}
dest->absolutePath = URI_TRUE;
if (!URI_FUNC(FixAmbiguity)(dest)) {
return URI_ERROR_MALLOC;
}
/* [18/50] else */
} else {
const URI_TYPE(PathSegment) * sourceSeg = absSource->pathHead;
const URI_TYPE(PathSegment) * baseSeg = absBase->pathHead;
/* [19/50] bool pathNaked = true; */
UriBool pathNaked = URI_TRUE;
/* [20/50] undef(last(Base.path)); */
/* NOOP */
/* [21/50] T.path = ""; */
dest->absolutePath = URI_FALSE;
/* [22/50] while (first(A.path) == first(Base.path)) do */
while ((sourceSeg != NULL) && (baseSeg != NULL)
&& !URI_STRNCMP(sourceSeg->text.first, baseSeg->text.first,
sourceSeg->text.afterLast - sourceSeg->text.first)
&& !((sourceSeg->text.first == sourceSeg->text.afterLast)
&& ((sourceSeg->next == NULL) != (baseSeg->next == NULL)))) {
/* [23/50] A.path++; */
sourceSeg = sourceSeg->next;
/* [24/50] Base.path++; */
baseSeg = baseSeg->next;
/* [25/50] endwhile; */
}
/* [26/50] while defined(first(Base.path)) do */
while ((baseSeg != NULL) && (baseSeg->next != NULL)) {
/* [27/50] Base.path++; */
baseSeg = baseSeg->next;
/* [28/50] T.path += "../"; */
if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstParent),
URI_FUNC(ConstParent) + 2)) {
return URI_ERROR_MALLOC;
}
/* [29/50] pathNaked = false; */
pathNaked = URI_FALSE;
/* [30/50] endwhile; */
}
/* [31/50] while defined(first(A.path)) do */
while (sourceSeg != NULL) {
/* [32/50] if pathNaked then */
if (pathNaked == URI_TRUE) {
/* [33/50] if (first(A.path) contains ":") then */
UriBool containsColon = URI_FALSE;
const URI_CHAR * ch = sourceSeg->text.first;
for (; ch < sourceSeg->text.afterLast; ch++) {
if (*ch == _UT(':')) {
containsColon = URI_TRUE;
break;
}
}
if (containsColon) {
/* [34/50] T.path += "./"; */
if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd),
URI_FUNC(ConstPwd) + 1)) {
return URI_ERROR_MALLOC;
}
/* [35/50] elseif (first(A.path) == "") then */
} else if (sourceSeg->text.first == sourceSeg->text.afterLast) {
/* [36/50] T.path += "/."; */
if (!URI_FUNC(AppendSegment)(dest, URI_FUNC(ConstPwd),
URI_FUNC(ConstPwd) + 1)) {
return URI_ERROR_MALLOC;
}
/* [37/50] endif; */
}
/* [38/50] endif; */
}
/* [39/50] T.path += first(A.path); */
if (!URI_FUNC(AppendSegment)(dest, sourceSeg->text.first,
sourceSeg->text.afterLast)) {
return URI_ERROR_MALLOC;
}
/* [40/50] pathNaked = false; */
pathNaked = URI_FALSE;
/* [41/50] A.path++; */
sourceSeg = sourceSeg->next;
/* [42/50] if defined(first(A.path)) then */
/* NOOP */
/* [43/50] T.path += + "/"; */
/* NOOP */
/* [44/50] endif; */
/* NOOP */
/* [45/50] endwhile; */
}
/* [46/50] endif; */
}
/* [47/50] endif; */
}
/* [48/50] endif; */
}
/* [49/50] T.query = A.query; */
dest->query = absSource->query;
/* [50/50] T.fragment = A.fragment; */
dest->fragment = absSource->fragment;
return URI_SUCCESS;
}
int URI_FUNC(RemoveBaseUri)(URI_TYPE(Uri) * dest,
const URI_TYPE(Uri) * absSource,
const URI_TYPE(Uri) * absBase,
UriBool domainRootMode) {
const int res = URI_FUNC(RemoveBaseUriImpl)(dest, absSource,
absBase, domainRootMode);
if ((res != URI_SUCCESS) && (dest != NULL)) {
URI_FUNC(FreeUriMembers)(dest);
}
return res;
}
#endif

View File

@ -0,0 +1,273 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "uriparser/Uri.h"
#include "uriparser2.h"
/* copy n bytes from src to dst and add a nul byte. dst must be large enough to hold n + 1 bytes. */
static char *memcpyz(char *dst, const char *src, int n) {
memcpy(dst, src, n);
dst[n] = '\0';
return dst;
}
/* returns the number of chars required to store the range as a string, including the nul byte */
static int range_size(const UriTextRangeA *r) {
if (r->first && r->first != r->afterLast) {
return 1 + (r->afterLast - r->first);
}
return 0;
}
/* returns the number of chars required to store the path, including the nul byte */
static int path_size(const UriPathSegmentA *ps) {
if (ps) {
/* +1 for the nul byte; the extra byte from range_size() is used for the leading slash */
int size = 1;
for (; ps != 0; ps = ps->next) {
size += range_size(&ps->text);
}
return size;
}
return 0;
}
static int uri_size(const UriUriA *uu) {
return range_size(&uu->scheme)
+ range_size(&uu->userInfo) + 1 /* userinfo will be split on : */
+ range_size(&uu->hostText)
+ path_size(uu->pathHead)
+ range_size(&uu->query)
+ range_size(&uu->fragment);
}
static const char *copy_range(const UriTextRangeA *r, char **buffer) {
const int size = r->afterLast - r->first;
if (size) {
const char *s = *buffer;
memcpyz(*buffer, r->first, size);
*buffer += size + 1;
return s;
}
return 0;
}
static const char *copy_path(const UriPathSegmentA *ps, char **buffer) {
const char *s = *buffer;
for (; ps != 0; ps = ps->next) {
**buffer = '/'; (*buffer)++;
copy_range(&ps->text, buffer);
if (ps->next) {
/* chop off trailing null, we'll append at least one more path segment */
(*buffer)--;
}
}
return s;
}
static int parse_int(const char *first, const char *after_last) {
const int size = after_last - first;
if (size) {
char buffer[size + 1];
memcpyz(buffer, first, size);
return atoi(buffer);
}
return 0;
}
static void parse_user_info(URI *uri, const UriTextRangeA *r, char **buffer) {
uri->user = uri->pass = 0;
const int size = r->afterLast - r->first;
if (size) {
char *colon = memchr(r->first, ':', size);
const int user_size = (colon ? colon : r->afterLast) - r->first;
const int pass_size = r->afterLast - (colon ? colon + 1 : r->afterLast);
if (user_size) {
uri->user = memcpyz(*buffer, r->first, user_size);
*buffer += user_size + 1;
}
if (pass_size) {
uri->pass = memcpyz(*buffer, colon + 1, pass_size);
*buffer += pass_size + 1;
}
}
}
static void init_uri(const UriUriA *uu, URI *uri, char *buffer) {
uri->scheme = copy_range(&uu->scheme, &buffer);
uri->user = 0;
uri->pass = 0;
uri->host = copy_range(&uu->hostText, &buffer);
uri->port = parse_int(uu->portText.first, uu->portText.afterLast);
uri->path = copy_path(uu->pathHead, &buffer);
uri->query = copy_range(&uu->query, &buffer);
uri->fragment = copy_range(&uu->fragment, &buffer);
parse_user_info(uri, &uu->userInfo, &buffer);
}
/* this function saves the URI components after the URI object itself, so it can be released with a single call to free() */
URI *uri_parse(const char *input) {
UriParserStateA state;
UriUriA uu;
URI *uri;
state.uri = &uu;
if (URI_SUCCESS == uriParseUriA(&state, input)) {
uri = calloc(1, sizeof(*uri) + uri_size(&uu));
if (uri) {
init_uri(&uu, uri, (char *) (uri + 1));
} else {
/* work around non-conformant malloc() implementations */
errno = ENOMEM;
}
} else {
uri = 0;
}
int saved_errno = errno;
uriFreeUriMembersA(&uu);
errno = saved_errno;
return uri;
}
/* this is a helper function for the C++ constructor that saves the URI components to a separately malloc()'ed buffer */
void *uri_parse2(const char *input, URI *uri) {
UriParserStateA state;
char *buffer;
UriUriA uu;
state.uri = &uu;
if (URI_SUCCESS == uriParseUriA(&state, input)) {
buffer = malloc(uri_size(&uu));
if (buffer) {
init_uri(&uu, uri, buffer);
} else {
/* work around non-conformant malloc() implementations */
errno = ENOMEM;
}
} else {
buffer = 0;
}
int saved_errno = errno;
uriFreeUriMembersA(&uu);
errno = saved_errno;
return buffer;
}
static char *append(char *dst, const char *src) {
const int size = strlen(src);
memcpy(dst, src, size);
return dst + size;
}
static int power_of_10(int n) {
int i;
for (i = 0; n > 0; i++, n /= 10);
return i;
}
char *uri_build(const URI *uri) {
int size = 0;
if (uri->scheme) {
size += strlen(uri->scheme) + 3; /* "://" */
}
if (uri->user) {
size += strlen(uri->user) + 1; /* ":" or "@" */
}
if (uri->pass) {
size += strlen(uri->pass) + 1; /* "@" */
}
if (uri->host) {
size += strlen(uri->host);
}
if (uri->port) {
size += 1 + power_of_10(uri->port); /* ":" port */
}
if (uri->path) {
size += strlen(uri->path);
}
if (uri->query) {
size += 1 + strlen(uri->query); /* "?" query */
}
if (uri->fragment) {
size += 1 + strlen(uri->fragment); /* "#" fragment */
}
char *s = malloc(size + 1);
if (s) {
char *p = s;
if (uri->scheme) {
p = append(p, uri->scheme);
*p++ = ':';
*p++ = '/';
*p++ = '/';
}
if (uri->user) {
p = append(p, uri->user);
}
if (uri->pass) {
*p++ = ':';
p = append(p, uri->pass);
}
if (uri->user || uri->pass) {
*p++ = '@';
}
if (uri->host) {
p = append(p, uri->host);
}
if (uri->port) {
p += sprintf(p, ":%d", uri->port);
}
if (uri->path) {
p = append(p, uri->path);
}
if (uri->query) {
*p++ = '?';
p = append(p, uri->query);
}
if (uri->fragment) {
*p++ = '#';
p = append(p, uri->fragment);
}
*p = '\0';
}
return s;
}
/* NULL-safe string comparison. a < b if a is NULL and b is not (and vice versa). */
#define COMPARE(a, b) \
if (a && b) { \
int n = strcmp(a, b); \
if (n) return n; \
} else if (a || b) { \
return a ? 1 : -1; \
}
int uri_compare(const URI *a, const URI *b) {
COMPARE(a->scheme, b->scheme);
COMPARE(a->host, b->host);
if (a->port != b->port) {
return a->port > b->port ? 1 : -1;
}
COMPARE(a->path, b->path);
COMPARE(a->query, b->query);
COMPARE(a->fragment, b->fragment);
COMPARE(a->user, b->user);
COMPARE(a->pass, b->pass);
return 0;
}

View File

@ -0,0 +1,101 @@
#ifndef URIPARSER2_H_
#define URIPARSER2_H_
#ifdef __cplusplus
#include <string>
#endif
/**
* URI object. After the call to uri_parse() fields will be NULL (0 for the port) if their component was absent in the input string.
*/
typedef struct URI {
const char *scheme;
const char *user;
const char *pass;
const char *host;
unsigned short port;
const char *path;
const char *query;
const char *fragment;
#ifdef __cplusplus
const void *const reserved;
URI(const char *uri = 0);
~URI();
bool operator<(const URI& uri) const;
bool operator>(const URI& uri) const;
bool operator<=(const URI& uri) const;
bool operator>=(const URI& uri) const;
bool operator==(const URI& uri) const;
bool operator!=(const URI& uri) const;
std::string to_string() const;
#endif
} URI;
#ifdef __cplusplus
#include <ostream>
#include <cstdlib>
extern "C" void *uri_parse2(const char *uri, URI *target);
extern "C" char *uri_build(const URI *uri);
extern "C" int uri_compare(const URI *a, const URI *b);
inline URI::URI(const char* uri): reserved(uri ? uri_parse2(uri, this) : 0) {
}
inline URI::~URI() {
free((void *) reserved);
}
inline std::string URI::to_string() const {
char *s = uri_build(this); /* FIXME handle NULL return value (ENOMEM) */
std::string rv(s);
free(s);
return rv;
}
inline bool URI::operator<(const URI& uri) const { return uri_compare(this, &uri) < 0; }
inline bool URI::operator>(const URI& uri) const { return uri_compare(this, &uri) > 0; }
inline bool URI::operator<=(const URI& uri) const { return uri_compare(this, &uri) <= 0; }
inline bool URI::operator>=(const URI& uri) const { return uri_compare(this, &uri) >= 0; }
inline bool URI::operator==(const URI& uri) const { return uri_compare(this, &uri) == 0; }
inline bool URI::operator!=(const URI& uri) const { return uri_compare(this, &uri) != 0; }
static inline std::ostream& operator<<(std::ostream& os, const URI& uri) {
return os << uri.to_string();
}
#else /* defined(__cplusplus) */
/**
* Parse URI into its components.
*
* @param uri The URI to parse.
* @return URI object. The caller is responsible for freeing this object. NULL is returned on parse error or out-of-memory conditions (in the latter case errno=ENOMEM).
*/
URI *uri_parse(const char *uri);
/**
* Create string representation of URI object.
*
* @param uri URI object.
* @return URI as a string. The caller is responsible for freeing this object. NULL is returned on out-of-memory conditions (errno=ENOMEM).
*/
char *uri_build(const URI *uri);
/**
* Compare two URI objects. Follows the strcmp() contract. The order in which components are compared is as follows: scheme, host, port, path, query, fragment, user, pass.
* NULL components are always smaller than their non-NULL counterparts. That is, a < b if a->scheme == NULL and b->scheme != NULL.
*
* @param a First URI object.
* @param b Second URI object.
* @return -1 if a < b, 0 if a == b, 1 if a > b.
*/
int uri_compare(const URI *a, const URI *b);
#endif /* __cplusplus */
#endif /* uriparser2.h */