diff --git a/src/site/apt/index.apt b/src/site/apt/index.apt
index 3d6d147f9..139c4775f 100644
--- a/src/site/apt/index.apt
+++ b/src/site/apt/index.apt
@@ -62,7 +62,7 @@ Features
* Tunneled HTTPS connections through HTTP proxies, via the CONNECT method.
- * Basic, Digest authentication schemes. Please note NTLM is currently not supported.
+ * Basic, Digest authentication schemes. Please note NTLM is supported only partially.
* Plug-in mechanism for custom authentication schemes.
diff --git a/src/site/apt/ntlm.apt b/src/site/apt/ntlm.apt
new file mode 100644
index 000000000..daaf61d94
--- /dev/null
+++ b/src/site/apt/ntlm.apt
@@ -0,0 +1,183 @@
+~~ $HeadURL$
+~~ $Revision$
+~~ $Date$
+~~
+~~ ====================================================================
+~~ Licensed to the Apache Software Foundation (ASF) under one
+~~ or more contributor license agreements. See the NOTICE file
+~~ distributed with this work for additional information
+~~ regarding copyright ownership. The ASF licenses this file
+~~ to you under the Apache License, Version 2.0 (the
+~~ "License"); you may not use this file except in compliance
+~~ with the License. You may obtain a copy of the License at
+~~
+~~ http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing,
+~~ software distributed under the License is distributed on an
+~~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+~~ KIND, either express or implied. See the License for the
+~~ specific language governing permissions and limitations
+~~ under the License.
+~~ ====================================================================
+~~
+~~ This software consists of voluntary contributions made by many
+~~ individuals on behalf of the Apache Software Foundation. For more
+~~ information on the Apache Software Foundation, please see
+~~ .
+
+ ----------
+ NTLM support in HttpClient
+ ----------
+ ----------
+ ----------
+
+NTLM support in HttpClient
+
+ Currently HttpClient 4.0 does not provide support for the NTLM authentication scheme
+ out of the box and probably never will. The reasons for that are legal rather than
+ technical.
+
+* Background
+
+ NTLM is a proprietary authentication scheme developed by Microsoft and optimized for
+ Windows operating system.
+
+ Until year 2008 there was no official, publicly available, complete documentation of
+ the protocol. {{{http://davenport.sourceforge.net/ntlm.html}Unofficial}} 3rd party
+ protocol descriptions existed as a result of reverse-engineering efforts. It was not
+ really known whether the protocol based on the reverse-engineering were complete or
+ even correct.
+
+ Microsoft published {{{http://download.microsoft.com/download/a/e/6/ae6e4142-aa58-45c6-8dcf-a657e5900cd3/%5BMS-NLMP%5D.pdf}MS-NLMP}}
+ and {{{http://download.microsoft.com/download/a/e/6/ae6e4142-aa58-45c6-8dcf-a657e5900cd3/%5BMS-NTHT%5D.pdf}MS-NTHT}}
+ specifications in February 2008 as a part of its
+ {{{http://www.microsoft.com/interop/principles/default.mspx}Interoperability
+ Principles initiative}}. Unfortunately, it is still not entirely clear whether NTLM
+ encryption algorithms are covered by any patents held by Microsoft, which would make
+ commercial users of open-source NTLM implementations liable for the use of Microsoft
+ intellectual property.
+
+* Enabling NTLM support in HttpClient 4.x
+
+ The good news is HttpClient is fully NTLM capable right out of the box.
+ HttpClient ships with the NTLM authentication scheme, which, if configured
+ to use an external NTLM engine, can handle NTLM challenges and authenticate
+ against NTLM servers.
+
+----------------------------------------
+public interface NTLMEngine {
+
+ String generateType1Msg(
+ String domain,
+ String workstation) throws NTLMEngineException;
+
+ String generateType3Msg(
+ String username,
+ String password,
+ String domain,
+ String workstation,
+ String challenge) throws NTLMEngineException;
+
+}
+----------------------------------------
+
+* Using Samba JCIFS as an NTLM engine
+
+ Follow these instructions to build an NTLMEngine implementation using JCIFS library
+
+ <>.
+
+ * Download the latest release of the JCIFS library from the
+ {{{http://jcifs.samba.org/}Samba}} web site
+
+ * Implement NTLMEngine interface
+
+----------------------------------------
+import jcifs.ntlmssp.Type1Message;
+import jcifs.ntlmssp.Type2Message;
+import jcifs.ntlmssp.Type3Message;
+import jcifs.util.Base64;
+
+import org.apache.http.impl.auth.NTLMEngine;
+import org.apache.http.impl.auth.NTLMEngineException;
+
+public class JCIFSEngine implements NTLMEngine {
+
+ public String generateType1Msg(
+ String domain,
+ String workstation) throws NTLMEngineException {
+
+ Type1Message t1m = new Type1Message(
+ Type1Message.getDefaultFlags(),
+ domain,
+ workstation);
+ return Base64.encode(t1m.toByteArray());
+ }
+
+ public String generateType3Msg(
+ String username,
+ String password,
+ String domain,
+ String workstation,
+ String challenge) throws NTLMEngineException {
+ Type2Message t2m;
+ try {
+ t2m = new Type2Message(Base64.decode(challenge));
+ } catch (IOException ex) {
+ throw new NTLMEngineException("Invalid Type2 message", ex);
+ }
+ Type3Message t3m = new Type3Message(
+ t2m,
+ password,
+ domain,
+ username,
+ workstation);
+ return Base64.encode(t3m.toByteArray());
+ }
+
+}
+----------------------------------------
+
+ * Implement AuthSchemeFactory interface
+
+----------------------------------------
+import org.apache.http.auth.AuthScheme;
+import org.apache.http.auth.AuthSchemeFactory;
+import org.apache.http.impl.auth.NTLMScheme;
+import org.apache.http.params.HttpParams;
+
+public class NTLMSchemeFactory implements AuthSchemeFactory {
+
+ public AuthScheme newInstance(final HttpParams params) {
+ return new NTLMScheme(new JCIFSEngine());
+ }
+
+}
+----------------------------------------
+
+ * Register NTLMSchemeFactory with the HttpClient instance you want to NTLM
+ enable.
+
+----------------------------------------
+httpclient.getAuthSchemes().register("ntlm", new NTLMSchemeFactory());
+----------------------------------------
+
+ * Set NTCredentials for the web server you are going to access.
+
+----------------------------------------
+httpclient.getCredentialsProvider().setCredentials(
+ new AuthScope("myserver", -1),
+ new NTCredentials("username", "password", "MYSERVER", "MYDOMAIN"));
+-----------------------------------------------------------
+
+ * You are done.
+
+
+* Why this code is not distributed with HttpClient
+
+ JCIFS is licensed under the Lesser General Public License (LGPL). This license
+ is not compatible with the Apache Licenses under which all Apache Software is
+ released. Lawyers of the Apache Software Foundation are currently investigating
+ under which conditions Apache software is allowed to make use of LGPL software.
diff --git a/src/site/apt/primer.apt b/src/site/apt/primer.apt
new file mode 100644
index 000000000..73503d5da
--- /dev/null
+++ b/src/site/apt/primer.apt
@@ -0,0 +1,670 @@
+~~ $HeadURL$
+~~ $Revision$
+~~ $Date$
+~~
+~~ ====================================================================
+~~ Licensed to the Apache Software Foundation (ASF) under one
+~~ or more contributor license agreements. See the NOTICE file
+~~ distributed with this work for additional information
+~~ regarding copyright ownership. The ASF licenses this file
+~~ to you under the Apache License, Version 2.0 (the
+~~ "License"); you may not use this file except in compliance
+~~ with the License. You may obtain a copy of the License at
+~~
+~~ http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing,
+~~ software distributed under the License is distributed on an
+~~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+~~ KIND, either express or implied. See the License for the
+~~ specific language governing permissions and limitations
+~~ under the License.
+~~ ====================================================================
+~~
+~~ This software consists of voluntary contributions made by many
+~~ individuals on behalf of the Apache Software Foundation. For more
+~~ information on the Apache Software Foundation, please see
+~~ .
+
+ ----------
+ Client HTTP Programming Primer
+ ----------
+ ----------
+ ----------
+
+Client HTTP Programming Primer
+
+* About
+
+ This document is intended for people who suddenly have to or want to implement
+ an application that automates something usually done with a browser,
+ but are missing the background to understand what they actually need to do.
+ It provides guidance on the steps required to implement a program that
+ interacts with a web site which is designed to be used with a browser.
+ It does not save you from eventually learning the background of what
+ you are doing, but it should help you to get started quickly and learn
+ the details later.
+
+ This document has evolved from discussions on the HttpClient mailing lists.
+ Although it refers to HttpClient, the concepts described here apply equally
+ to HttpComponents or SUN's {{{http://java.sun.com/j2se/1.4.2/docs/api/java/net/HttpURLConnection.html}HttpURLConnection}}
+ or any other HTTP communication library for any programming language. So you
+ might find it useful even if you're not using Java and HttpClient.
+
+ The existence of this document does not imply that the HttpClient community
+ feels responsible for teaching you how to program a client HTTP application.
+ It is merely a way for us to reduce the noise on the mailing list without
+ just leaving the newbies out in the cold.
+
+* Scenario
+
+ Let's assume that you have some kind of repetitive, web-based task that
+ you want to automate. Something like:
+
+ * goto page http://xxx.yyy.zzz/login.html
+
+ * enter username and password in a web form and hit the "login" button
+
+ * navigate to a specific page
+
+ * check the number/headline/whatever shown on that page
+
+ []
+
+ At this time, we don't have a specific example which could be developed
+ into a sample application. So this document is all bla-bla, and you will
+ have to work out the details - all the details - yourself. Such is life.
+
+* Caveat
+
+ This scenario describes a hobbyist usage of HTTP, in other words:
+ <>. Web sites are designed for user interaction, not
+ as an application programming interface (API). The interface of a
+ web site is the user interface displayed by a browser. The HTTP
+ communication between the browser and the server is an internal API,
+ subject to change without notice.
+
+ A web site can be redesigned at any point in time. The server then
+ sends different documents and a browser will display the new content.
+ The user easily adjusts to click the appropriate links, and the browser
+ communicates via HTTP as specified by the new documents from the server.
+ Your application that only mimicks a browser will simply break.
+
+ Nevertheless, implementing this scenario will help you to get
+ familiar with HTTP communication. It is also "good enough" for
+ hobbyists applications, for example if you want to download the
+ latest installment of your favorite daily webcomic to install
+ it as the screen background. There is no big damage if such an
+ application breaks.
+
+ If you want to implement a solid application, you should use only
+ published APIs. For example, to check for new mail on your webmail
+ account, you should ask the webmail provider for POP or IMAP access.
+ These are standardized protocols supported my most EMail client applications.
+ If you want to have a newsticker, look for RSS feeds from the provider and
+ applications that display them.
+
+ As another example, if you want to perform a web search, there are
+ search companies that provide an API for using their search engines.
+ Unlike the examples before, such APIs are proprietary. You will still
+ have to implement an application, but then you are using a published API
+ that the provider will not change without notice.
+
+
+* Not a Browser
+
+ HttpClient is not a browser. Here's the difference.
+
+ <>
+
+[images/browser.png] Browser
+
+ The figure shows some of the components you will find in a browser.
+ To the left, there is the user interface. The browser needs a rendering
+ engine to display pages, and to interpret user input such as mouse clicks
+ somewhere on the displayed page. There is a layout engine which computes
+ how an HTML page should be displayed, including cascading style sheets
+ and images. A JavaScript interpreter runs JavaScript code embedded in
+ or referenced from HTML pages. Events from the user interface are passed
+ to the JavaScript interpreter for processing.
+ On the top, there are interfaces for plugins that can handle Applets,
+ embedded media objects like PDF files, Quicktime movies and Flash animations,
+ or ActiveX controls that can do anything.
+
+ In the center of the figure you can find internal components. Browsers
+ have a cache of recently accessed documents and image files. They need
+ to remember cookies and passwords entered by the user. Such information
+ can be kept in memory or stored persistently in the file system at the
+ bottom of the figure, to be available again when the browser is restarted.
+ Certificates for secure communication are almost always stored persistently.
+ To the right of the figure is the network. Browsers support many protocols
+ on different levels of abstraction. There are application protocols
+ such as FTP and HTTP to retrieve documents from servers, and transport
+ layer protocols such as TLS/SSL and Socks to establish connections for
+ the application protocols.
+
+ One characteristic of browsers that is not shown in the figure is tolerance
+ for bad input. There needs to be tolerance for invalid user input to make
+ the browser user friendly. There also needs to be tolerance for malformed
+ documents retrieved from servers, and for flaws in server behavior when
+ executing protocols, to make as many websites as possible accessible to
+ the user.
+
+ <>
+
+[images/httpclient.png] HTTP Client
+
+ The figure shows some of the components you will find in a browser,
+ and highlights the scope of HttpClient. The primary responsibility
+ of HttpClient is the HTTP protocol, executed directly or through an
+ HTTP proxy. It provides interfaces and default implementations for
+ cookie and password management, but not for persisting such data.
+ User interfacing, HTML parsing, plugins or non-HTTP application level
+ protocols are not in the scope of HttpClient. It does provide interfaces
+ to plug in transport layer protocols, but it does not implement such
+ protocols.
+
+ All the rest of a browser's functionality you require needs to be
+ provided by your application. HttpClient executes HTTP requests, but it
+ will not and can not assemble them. Since HttpClient does not interface
+ with the user, nor interpret content such as HTML files, there is
+ little or no tolerance for bad data passed to the API. There is some
+ tolerance for flaws in server behavior, but there are limits to the
+ deviations HttpClient can handle.
+
+* Terminology
+
+ This section introduces some important terms you have to know to
+ understand the rest of this document.
+
+ <<>>
+
+ consists of a header section and an optional entity. There are two kinds
+ of messages, requests and responses. They differ in the format of the
+ first line, but both can have header fields and an optional entity.
+
+ <<>>
+
+ is sent from a client to a server. The first line includes the URI for
+ which the request is sent, and a method that the server should execute
+ for the client.
+
+ <<>>
+
+ is sent from a server to a client in response to a request. The first
+ line includes a status code that tells about success or failure of
+ the request. HTTP defines a set of status codes, like 200 for success
+ and 404 for not found. Other protocols based on HTTP can define
+ additional status codes.
+
+ <<>>
+
+ is an operation requested from the server. HTTP defines a set of
+ operations, the most frequent being GET and POST. Other protocols
+ based on HTTP can define additional methods.
+
+ <<>>
+
+ are name-value pairs, where both name and value are text. The name of
+ a header field is not case sensitive. Multiple values can be assigned
+ to the same name. RFC 2616 defines a wide range
+ of header fields for handling various aspects of the HTTP protocol.
+ Other specifications, like RFC 2617 and RFC 2965, define additional
+ headers. Some of the defined headers are for general use, others are
+ meant for exclusive use with either requests or responses, still others
+ are meant for use only with an entity.
+
+ <<>>
+
+ is data sent with an HTTP message. For example, a response can contain
+ the page or image you are downloading as an entity, or a request can
+ include the parameters that you entered into a web form.
+ The entity of an HTTP message can have an arbitrary data format, which
+ is usually specified as a MIME type in a header field.
+
+ <<>>
+
+ is a series of requests from a single source to a server. The server
+ can keep session data, and needs to recognize the session to which
+ each incoming request belongs. For example, if you execute a web search,
+ the server will only return one page of search results. But it keeps
+ track of the other results and makes them available when you click on
+ the link to the "next" page. The server needs to know from the request
+ that it is you and your session for which more results are requested,
+ and not me and my session. That's because I searched for something else.
+
+ <<>>
+
+ are the preferred way for servers to track sessions. The server supplies
+ a piece of data, called a cookie, in response to a request. The server
+ expects the client to send that piece of data in a header field with each
+ following request of the same session.
+ The cookie is different for each session, so the server can identify to
+ which session a request belongs by looking at the cookie. If the cookie
+ is missing from a request, the server will not respond as expected.
+
+* Step by Step
+
+** GET the Login Page
+
+ Create and execute a GET request for the login page.
+ Just use the link you would type into the browser as the URL.
+ This is what a browser does when you enter a URL in the address bar
+ or when you click on a link that points to another web page.
+
+ Inspect the response from the server:
+
+ * do you get the page you expected?
+
+ []
+
+ It should be sent as the entity of the response to your request.
+ The entity is also referred to as the response body.
+
+ * do you get a session cookie?
+
+ []
+
+ Cookies are sent in a header field named Set-Cookie or Set-Cookie2.
+ It is possible that you don't get a session cookie until you log in.
+ If there is no session cookie in the response, you'll have to do perform
+ step 2 later, after you reach the point where the cookie is set.
+
+ If you do not get the page you expect, check the URL you are requesting.
+ If it is correct, the server may use a browser detection. You will have
+ to set the header field User-Agent to a value used by a popular browser
+ to pretend that the request is coming from that browser.
+
+ If you can't get the login page, get the home page instead now.
+ Get the login page in the next step, when you establish the session.
+
+** Establish the Session
+
+ Create and execute another GET request for a page.
+ You can simply request the login page again, or some other page
+ of which you know the URL. Do NOT try to get a page which would
+ be returned in response to submitting a web form. Use something
+ you can reach simply by clicking on a link in the browser. Something
+ where you can see the URL in the browser status line while the
+ mouse pointer is hovering over the link.
+
+ This step is important when developing the application. Once you know
+ that your application does establish the session correctly, you may
+ be able to remove it. Only if you couldn't get the login page directly
+ and had to get the home page first, you know you have to leave it in.
+
+ Inspect the request being sent to the server.
+
+ * is the session cookie sent with the request?
+
+ []
+
+ You can see what is sent to the server by enabling the wire log
+ for HttpClient. You only need to see the request headers, not the body.
+ The session cookie should be sent in a header field called Cookie.
+ There may be several of those, and other cookies might be sent as well.
+
+ Inspect the response from the server:
+
+ * do you get another session cookie?
+
+ []
+
+ You should not get another session cookie. If you get the same session
+ cookie as before, the server behaves a little strange but that should
+ not be a problem. If you get a new session cookie, then the server did
+ not recognize the session for the request. Usually, this happens if the
+ request did not contain the session cookie. But servers might use other
+ means to track sessions, or to detect session hijacking.
+
+ If the session cookie is not sent in the request, one of two things
+ has gone wrong. Either the cookie was not detected in the previous
+ response, or the cookie was not selected for being sent with the new
+ request.
+
+ HttpClient automatically parses cookies sent in responses and puts them
+ into a cookie store. HttpClient uses a configurable cookie policy
+ to decide whether a cookie being sent from a server is correct.
+ The default policy complies strictly with RFC 2109, but many servers
+ do not. Play around with the cookie policies until the cookie is
+ accepted and put into the cookie store.
+
+ If the cookie is accepted from the previous response but still not
+ sent with the new request, make sure that HttpClient uses the same
+ cookie store object. Unless you explicitly manage cookie store
+ objects (not recommended for newbies!), this will be the case if you
+ use the same HttpClient object to execute both requests.
+
+ If the cookie is still not sent with the request, make sure that the
+ URL you are requesting is in the scope for the cookie. Cookies are
+ only sent to the domain and path specified in the cookie scope.
+ A cookie for host "jakarta.apache.org" will not be sent to host
+ "tomcat.apache.org". A cookie for domain ".apache.org" will be sent
+ to both. A cookie for host "apache.org", without the leading dot,
+ will not be sent to "jakarta.apache.org". The latter case can be
+ resolved by using a different cookie spec that adds the leading dot.
+ In the other cases, use a URL that in the cookie scope to establish
+ the session.
+
+ If the session cookie is sent with the request, but a new session cookie
+ is set in the response anyway, check whether there are cookies other
+ than the session cookie in the request. Some servers are incapable of
+ detecting multiple cookies sent in individual header fields. HttpClient
+ can be advised to put all cookies into a single header field.
+
+ If that doesn't help, you are in trouble. The server may use additional
+ means to track the session, for example the header field named Referer.
+ Set that field to the URL of the previous request.
+ ({{{http://mail-archives.apache.org/mod_mbox/jakarta-httpclient-user/200602.mbox/%3c19b.44e04b45.31166eaa@aol.com%3e}see this mail}})
+
+ If that doesn't help either, you will have to compare the request from
+ your application to a corresponding one generated by a browser. The
+ instructions in step 5 for POST requests apply for GET requests as well.
+ It's even simpler with GET, since you don't have an entity.
+
+** Analyze the Form
+
+ Now it is time to analyze the form defined in the HTML markup of the page.
+ A form in HTML is a set of name-value-pairs called parameters, where some
+ of the values can be entered in the browser. By analyzing the HTML markup,
+ you can learn which parameters you have to define and how to send them
+ to the server.
+
+ Look for the tag. Everything in between the two may be
+ relevant. Let's start with the attributes of the