1 /**
2  *
3  */
4 module httparsed;
5 
6 nothrow @safe @nogc:
7 
8 /// Parser error codes
9 enum ParserError : int
10 {
11     partial = 1,    /// not enough data to parse message
12     newLine,        /// invalid character in new line
13     headerName,     /// invalid character in header name
14     headerValue,    /// invalid header value
15     status,         /// invalid character in response status
16     token,          /// invalid character in token
17     noHeaderName,   /// empty header name
18     noMethod,       /// no method in request line
19     noVersion,      /// no version in request line / response status line
20     noUri,          /// no URI in request line
21     noStatus,       /// no status code or text in status line
22     invalidMethod,  /// invalid method in request line
23     invalidVersion, /// invalid version for the protocol message
24 }
25 
26 /// Helper function to initialize message parser
27 auto initParser(MSG, Args...)(Args args) { return MsgParser!MSG(args); }
28 
29 /**
30  *  HTTP/RTSP message parser.
31  */
32 struct MsgParser(MSG)
33 {
34     import std.traits : ForeachType, isArray, Unqual;
35 
36     this(Args...)(Args args)
37     {
38         this.msg = MSG(args);
39     }
40 
41     /**
42      *  Parses message request (request line + headers).
43      *
44      *  Params:
45      *    - buffer = buffer to parse message from
46      *    - lastPos = optional argument to store / pass previous position to which message was
47      *                already parsed (speeds up parsing when message comes in parts)
48      *
49      *  Returns:
50      *    * parsed message header length when parsed sucessfully
51      *    * `-ParserError` on error (ie. -1 when message header is not complete yet)
52      */
53     int parseRequest(T)(T buffer, ref uint lastPos)
54         if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte)))
55     {
56         static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos);
57         else return parse!parseRequestLine(buffer, lastPos);
58     }
59 
60     /// ditto
61     int parseRequest(T)(T buffer)
62         if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte)))
63     {
64         uint lastPos;
65         static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos);
66         else return parse!parseRequestLine(buffer, lastPos);
67     }
68 
69     /**
70      *  Parses message response (status line + headers).
71      *
72      *  Params:
73      *    - buffer = buffer to parse message from
74      *    - lastPos = optional argument to store / pass previous position to which message was
75      *                already parsed (speeds up parsing when message comes in parts)
76      *
77      *  Returns:
78      *    * parsed message header length when parsed sucessfully
79      *    * `-ParserError.partial` on error (ie. -1 when message header is not comlete yet)
80      */
81     int parseResponse(T)(T buffer, ref uint lastPos)
82         if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte)))
83     {
84         static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos);
85         else return parse!parseStatusLine(buffer, lastPos);
86     }
87 
88     /// ditto
89     int parseResponse(T)(T buffer)
90         if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte)))
91     {
92         uint lastPos;
93         static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos);
94         else return parse!parseStatusLine(buffer, lastPos);
95     }
96 
97     /// Gets provided structure used during parsing
98     ref MSG msg() return { return m_msg; }
99 
100     alias msg this;
101 
102 private:
103 
104     // character map of valid characters for token, forbidden:
105     //   0-SP, DEL, HT
106     //   ()<>@,;:\"/[]?={}
107     enum tokenRanges = "\0 \"\"(),,//:@[]{}\x7f\xff";
108     enum tokenSSERanges = "\0 \"\"(),,//:@[]{\xff"; // merge of last range due to the SSE register size limit
109 
110     enum versionRanges = "\0-:@[`{\xff"; // allow only [A-Za-z./] characters
111 
112     MSG m_msg;
113 
114     int parse(alias pred)(const(ubyte)[] buffer, ref uint lastPos)
115     {
116         assert(buffer.length >= lastPos);
117         immutable l = buffer.length;
118 
119         if (_expect(!lastPos, true))
120         {
121             if (_expect(!buffer.length, false)) return err(ParserError.partial);
122 
123             // skip first empty line (some clients add CRLF after POST content)
124             if (_expect(buffer[0] == '\r', false))
125             {
126                 if (_expect(buffer.length == 1, false)) return err(ParserError.partial);
127                 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine);
128                 lastPos += 2;
129                 buffer = buffer[lastPos..$];
130             }
131             else if (_expect(buffer[0] == '\n', false))
132                 buffer = buffer[++lastPos..$];
133 
134             immutable res = pred(buffer);
135             if (_expect(res < 0, false)) return res;
136 
137             lastPos = cast(int)(l - buffer.length); // store index of last parsed line
138         }
139         else buffer = buffer[lastPos..$]; // skip already parsed lines
140 
141         immutable hdrRes = parseHeaders(buffer);
142         lastPos = cast(int)(l - buffer.length); // store index of last parsed line
143 
144         if (_expect(hdrRes < 0, false)) return hdrRes;
145         return lastPos; // finished
146     }
147 
148     int parseHeaders(ref const(ubyte)[] buffer)
149     {
150         bool hasHeader;
151         size_t start, i;
152         const(ubyte)[] name, value;
153         while (true)
154         {
155             // check for msg headers end
156             if (_expect(buffer.length == 0, false)) return err(ParserError.partial);
157             if (buffer[0] == '\r')
158             {
159                 if (_expect(buffer.length == 1, false)) return err(ParserError.partial);
160                 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine);
161 
162                 buffer = buffer[2..$];
163                 return 0;
164             }
165             if (_expect(buffer[0] == '\n', false))
166             {
167                 buffer = buffer[1..$];
168                 return 0;
169             }
170 
171             if (!hasHeader || (buffer[i] != ' ' && buffer[i] != '\t'))
172             {
173                 auto ret = parseToken!(tokenRanges, ':', tokenSSERanges)(buffer, i);
174                 if (_expect(ret < 0, false)) return ret;
175                 if (_expect(start == i, false)) return err(ParserError.noHeaderName);
176                 name = buffer[start..i]; // store header name
177                 i++; // move index after colon
178 
179                 // skip over SP and HT
180                 for (;; ++i)
181                 {
182                     if (_expect(i == buffer.length, false)) return err(ParserError.partial);
183                     if (buffer[i] != ' ' && buffer[i] != '\t') break;
184                 }
185                 start = i;
186             }
187             else name = null; // multiline header
188 
189             // parse value
190             auto ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i);
191             if (_expect(ret < 0, false)) return ret;
192             value = buffer[start..i];
193             mixin(advanceNewline);
194             hasHeader = true; // flag to define that we can now accept multiline header values
195             static if (__traits(hasMember, m_msg, "onHeader"))
196             {
197                 // remove trailing SPs and HTABs
198                 if (_expect(value.length && (value[$-1] == ' ' || value[$-1] == '\t'), false))
199                 {
200                     int j = cast(int)value.length - 2;
201                     for (; j >= 0; --j)
202                         if (!(value[j] == ' ' || value[j] == '\t'))
203                             break;
204                     value = value[0..j+1];
205                 }
206 
207                 static if (is(typeof(m_msg.onHeader("", "")) == void))
208                     m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value);
209                 else {
210                     auto r = m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value);
211                     if (_expect(r < 0, false)) return r;
212                 }
213             }
214 
215             // header line completed -> advance buffer
216             buffer = buffer[i..$];
217             start = i = 0;
218         }
219         assert(0);
220     }
221 
222     auto parseRequestLine(ref const(ubyte)[] buffer)
223     {
224         size_t start, i;
225 
226         // METHOD
227         auto ret = parseToken!(tokenRanges, ' ', tokenSSERanges)(buffer, i);
228         if (_expect(ret < 0, false)) return ret;
229         if (_expect(start == i, false)) return err(ParserError.noMethod);
230 
231         static if (__traits(hasMember, m_msg, "onMethod"))
232         {
233             static if (is(typeof(m_msg.onMethod("")) == void))
234                 m_msg.onMethod(cast(const(char)[])buffer[start..i]);
235             else {
236                 auto r = m_msg.onMethod(cast(const(char)[])buffer[start..i]);
237                 if (_expect(r < 0, false)) return r;
238             }
239         }
240         mixin(skipSpaces!(ParserError.noUri));
241         start = i;
242 
243         // PATH
244         ret = parseToken!("\000\040\177\177", ' ')(buffer, i);
245         if (_expect(ret < 0, false)) return ret;
246         static if (__traits(hasMember, m_msg, "onUri"))
247         {
248             static if (is(typeof(m_msg.onUri("")) == void))
249                 m_msg.onUri(cast(const(char)[])buffer[start..i]);
250             else {
251                 auto ur = m_msg.onUri(cast(const(char)[])buffer[start..i]);
252                 if (_expect(ur < 0, false)) return ur;
253             }
254         }
255         mixin(skipSpaces!(ParserError.noVersion));
256         start = i;
257 
258         // VERSION
259         ret = parseToken!(versionRanges, "\r\n")(buffer, i);
260         if (_expect(ret < 0, false)) return ret;
261         static if (__traits(hasMember, m_msg, "onVersion"))
262         {
263             static if (is(typeof(m_msg.onVersion("")) == void))
264                 m_msg.onVersion(cast(const(char)[])buffer[start..i]);
265             else {
266                 auto vr = m_msg.onVersion(cast(const(char)[])buffer[start..i]);
267                 if (_expect(vr < 0, false)) return vr;
268             }
269         }
270         mixin(advanceNewline);
271 
272         // advance buffer after the request line
273         buffer = buffer[i..$];
274         return 0;
275     }
276 
277     auto parseStatusLine(ref const(ubyte)[] buffer)
278     {
279         size_t start, i;
280 
281         // VERSION
282         auto ret = parseToken!(versionRanges, ' ')(buffer, i);
283         if (_expect(ret < 0, false)) return ret;
284         if (_expect(start == i, false)) return err(ParserError.noVersion);
285         static if (__traits(hasMember, m_msg, "onVersion"))
286         {
287             static if (is(typeof(m_msg.onVersion("")) == void))
288                 m_msg.onVersion(cast(const(char)[])buffer[start..i]);
289             else {
290                 auto r = m_msg.onVersion(cast(const(char)[])buffer[start..i]);
291                 if (_expect(r < 0, false)) return r;
292             }
293         }
294         mixin(skipSpaces!(ParserError.noStatus));
295         start = i;
296 
297         // STATUS CODE
298         if (_expect(i+3 >= buffer.length, false))
299             return err(ParserError.partial); // not enough data - we want at least [:digit:][:digit:][:digit:]<other char> to try to parse
300 
301         int code;
302         foreach (j, m; [100, 10, 1])
303         {
304             if (buffer[i+j] < '0' || buffer[i+j] > '9') return err(ParserError.status);
305             code += (buffer[start+j] - '0') * m;
306         }
307         i += 3;
308         static if (__traits(hasMember, m_msg, "onStatus"))
309         {
310             static if (is(typeof(m_msg.onStatus(code)) == void))
311                 m_msg.onStatus(code);
312             else {
313                 auto sr = m_msg.onStatus(code);
314                 if (_expect(sr < 0, false)) return sr;
315             }
316         }
317         if (_expect(i == buffer.length, false))
318             return err(ParserError.partial);
319         if (_expect(buffer[i] != ' ' && buffer[i] != '\r' && buffer[i] != '\n', false))
320             return err(ParserError.status); // Garbage after status
321 
322         start = i;
323 
324         // MESSAGE
325         ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i);
326         if (_expect(ret < 0, false)) return ret;
327         static if (__traits(hasMember, m_msg, "onStatusMsg"))
328         {
329             // remove preceding space (we did't advance over spaces because possibly missing status message)
330             if (i > start)
331             {
332                 while (buffer[start] == ' ' && start < i) start++;
333                 if (i > start)
334                 {
335                     static if (is(typeof(m_msg.onStatusMsg("")) == void))
336                         m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]);
337                     else {
338                         auto smr = m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]);
339                         if (_expect(smr < 0, false)) return smr;
340                     }
341                 }
342             }
343         }
344         mixin(advanceNewline);
345 
346         // advance buffer after the status line
347         buffer = buffer[i..$];
348         return 0;
349     }
350 
351     /*
352      * Advances buffer over the token to the next character while checking for valid characters.
353      * On success, buffer index is left on the next character.
354      *
355      * Params:
356      *      - ranges = ranges of characters to stop on
357      *      - sseRanges = if null, same ranges is used, but they are limited to 8 ranges
358      *      - next  = next character/s to stop on (must be present in the provided ranges too)
359      * Returns: 0 on success error code otherwise
360      */
361     int parseToken(string ranges, alias next, string sseRanges = null)(const(ubyte)[] buffer, ref size_t i) pure
362     {
363         version (DigitalMars) {
364             static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this
365         } else pragma(inline, true);
366 
367         immutable charMap = parseTokenCharMap!(ranges)();
368 
369         static if (LDC_with_SSE42)
370         {
371             // CT function to prepare input for SIMD vector enum
372             static byte[16] padRanges()(string ranges)
373             {
374                 byte[16] res;
375                 // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests
376                 foreach (i, c; ranges) res[i] = cast(byte)c;
377                 return res;
378             }
379 
380             static if (sseRanges) alias usedRng = sseRanges;
381             else alias usedRng = ranges;
382             static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long");
383             static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters");
384             enum rangesSize = usedRng.length;
385             enum byte16 rngE = padRanges(usedRng);
386 
387             if (_expect(buffer.length - i >= 16, true))
388             {
389                 size_t left = (buffer.length - i) & ~15; // round down to multiple of 16
390                 byte16 ranges16 = rngE;
391 
392                 do
393                 {
394                     byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }();
395                     immutable r = _mm_cmpestri(
396                         ranges16, rangesSize,
397                         b16, 16,
398                         _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS
399                     );
400 
401                     if (r != 16)
402                     {
403                         i += r;
404                         goto FOUND;
405                     }
406                     i += 16;
407                     left -= 16;
408                 }
409                 while (_expect(left != 0, true));
410             }
411         }
412         else
413         {
414             // faster unrolled loop to iterate over 8 characters
415             loop: while (_expect(buffer.length - i >= 8, true))
416             {
417                 static foreach (_; 0..8)
418                 {
419                     if (_expect(!charMap[buffer[i]], false)) goto FOUND;
420                     ++i;
421                 }
422             }
423         }
424 
425         // handle the rest
426         if (_expect(i >= buffer.length, false)) return err(ParserError.partial);
427 
428         FOUND:
429         while (true)
430         {
431             static if (is(typeof(next) == char)) {
432                 static assert(!charMap[next], "Next character is not in ranges");
433                 if (buffer[i] == next) return 0;
434             } else {
435                 static assert(next.length > 0, "Next character not provided");
436                 static foreach (c; next) {
437                     static assert(!charMap[c], "Next character is not in ranges");
438                     if (buffer[i] == c) return 0;
439                 }
440             }
441             if (_expect(!charMap[buffer[i]], false)) return err(ParserError.token);
442             if (_expect(++i == buffer.length, false)) return err(ParserError.partial);
443         }
444     }
445 
446     // advances over new line
447     enum advanceNewline = q{
448             assert(i < buffer.length);
449             if (_expect(buffer[i] == '\r', true))
450             {
451                 if (_expect(i+1 == buffer.length, false)) return err(ParserError.partial);
452                 if (_expect(buffer[i+1] != '\n', false)) return err(ParserError.newLine);
453                 i += 2;
454             }
455             else if (buffer[i] == '\n') ++i;
456             else assert(0);
457         };
458 
459     // skips over spaces in the buffer
460     template skipSpaces(ParserError err)
461     {
462         enum skipSpaces = `
463             do {
464                 ++i;
465                 if (_expect(buffer.length == i, false)) return err(ParserError.partial);
466                 if (_expect(buffer[i] == '\r' || buffer[i] == '\n', false)) return err(` ~ err.stringof ~ `);
467             } while (buffer[i] == ' ');
468         `;
469     }
470 }
471 
472 ///
473 @("example")
474 unittest
475 {
476     // init parser
477     auto reqParser = initParser!Msg(); // or `MsgParser!MSG reqParser;`
478     auto resParser = initParser!Msg(); // or `MsgParser!MSG resParser;`
479 
480     // parse request
481     string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n";
482     // returns parsed message header length when parsed sucessfully, -ParserError on error
483     int res = reqParser.parseRequest(data);
484     assert(res == data.length);
485     assert(reqParser.method == "GET");
486     assert(reqParser.uri == "/foo");
487     assert(reqParser.minorVer == 1); // HTTP/1.1
488     assert(reqParser.headers.length == 1);
489     assert(reqParser.headers[0].name == "Host");
490     assert(reqParser.headers[0].value == "127.0.0.1:8090");
491 
492     // parse response
493     data = "HTTP/1.0 200 OK\r\n";
494     uint lastPos; // store last parsed position for next run
495     res = resParser.parseResponse(data, lastPos);
496     assert(res == -ParserError.partial); // no complete message header yet
497     data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo";
498     res = resParser.parseResponse(data, lastPos); // starts parsing from previous position
499     assert(res == data.length - 3); // whole message header parsed, body left to be handled based on actual header values
500     assert(resParser.minorVer == 0); // HTTP/1.0
501     assert(resParser.status == 200);
502     assert(resParser.statusMsg == "OK");
503     assert(resParser.headers.length == 2);
504     assert(resParser.headers[0].name == "Content-Type");
505     assert(resParser.headers[0].value == "text/plain");
506     assert(resParser.headers[1].name == "Content-Length");
507     assert(resParser.headers[1].value == "3");
508 }
509 
510 /**
511  * Parses HTTP version from a slice returned in `onVersion` callback.
512  *
513  * Returns: minor version (0 for HTTP/1.0 or 1 for HTTP/1.1) on success or
514  *          `-ParserError.invalidVersion` on error
515  */
516 int parseHttpVersion(const(char)[] ver) pure
517 {
518     if (_expect(ver.length != 8, false)) return err(ParserError.invalidVersion);
519 
520     static foreach (i, c; "HTTP/1.")
521         if (_expect(ver[i] != c, false)) return err(ParserError.invalidVersion);
522 
523     if (_expect(ver[7] < '0' || ver[7] > '9', false)) return err(ParserError.invalidVersion);
524     return ver[7] - '0';
525 }
526 
527 @("parseHttpVersion")
528 unittest
529 {
530     assert(parseHttpVersion("FOO") < 0);
531     assert(parseHttpVersion("HTTP/1.") < 0);
532     assert(parseHttpVersion("HTTP/1.12") < 0);
533     assert(parseHttpVersion("HTTP/1.a") < 0);
534     assert(parseHttpVersion("HTTP/2.0") < 0);
535     assert(parseHttpVersion("HTTP/1.00") < 0);
536     assert(parseHttpVersion("HTTP/1.0") == 0);
537     assert(parseHttpVersion("HTTP/1.1") == 1);
538 }
539 
540 version (CI_MAIN)
541 {
542     // workaround for dub not supporting unittests with betterC
543     version (D_BetterC)
544     {
545         extern(C) void main() @trusted {
546             import core.stdc.stdio;
547             static foreach(u; __traits(getUnitTests, httparsed))
548             {
549                 static if (__traits(getAttributes, u).length)
550                     printf("unittest %s:%d | '" ~ __traits(getAttributes, u)[0] ~ "'\n", __traits(getLocation, u)[0].ptr, __traits(getLocation, u)[1]);
551                 else
552                     printf("unittest %s:%d\n", __traits(getLocation, u)[0].ptr, __traits(getLocation, u)[1]);
553                 u();
554             }
555             debug printf("All unit tests have been run successfully.\n");
556         }
557     }
558     else
559     {
560         void main()
561         {
562             version (unittest) {} // run automagically
563             else
564             {
565                 import core.stdc.stdio;
566 
567                 // just a compilation test
568                 auto reqParser = initParser!Msg();
569                 auto resParser = initParser!Msg();
570 
571                 string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n";
572                 int res = reqParser.parseRequest(data);
573                 assert(res == data.length);
574 
575                 data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo";
576                 res = resParser.parseResponse(data);
577                 assert(res == data.length - 3);
578                 () @trusted { printf("Test app works\n"); }();
579             }
580         }
581     }
582 }
583 
584 private:
585 
586 int err(ParserError e) pure { pragma(inline, true); return -(cast(int)e); }
587 
588 /// Builds valid char map from the provided ranges of invalid ones
589 bool[256] buildValidCharMap()(string invalidRanges)
590 {
591     assert(invalidRanges.length % 2 == 0, "Uneven ranges");
592     bool[256] res = true;
593 
594     for (int i=0; i < invalidRanges.length; i+=2)
595         for (int j=invalidRanges[i]; j <= invalidRanges[i+1]; ++j)
596             res[j] = false;
597     return res;
598 }
599 
600 @("buildValidCharMap")
601 unittest
602 {
603     string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff";
604     assert(buildValidCharMap(ranges) ==
605         cast(bool[])[
606             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
607             0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
608             0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,
609             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,
610             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
611             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
612             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
613             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
614         ]);
615 }
616 
617 immutable(bool[256]) parseTokenCharMap(string invalidRanges)() {
618     static immutable charMap = buildValidCharMap(invalidRanges);
619     return charMap;
620 }
621 
622 version (unittest) version = WITH_MSG;
623 else version (CI_MAIN) version = WITH_MSG;
624 
625 version (WITH_MSG)
626 {
627     // define our message content handler
628     struct Header
629     {
630         const(char)[] name;
631         const(char)[] value;
632     }
633 
634     // Just store slices of parsed message header
635     struct Msg
636     {
637         @safe pure nothrow @nogc:
638         void onMethod(const(char)[] method) { this.method = method; }
639         void onUri(const(char)[] uri) { this.uri = uri; }
640         int onVersion(const(char)[] ver)
641         {
642             minorVer = parseHttpVersion(ver);
643             return minorVer >= 0 ? 0 : minorVer;
644         }
645         void onHeader(const(char)[] name, const(char)[] value) {
646             this.m_headers[m_headersLength].name = name;
647             this.m_headers[m_headersLength++].value = value;
648         }
649         void onStatus(int status) { this.status = status; }
650         void onStatusMsg(const(char)[] statusMsg) { this.statusMsg = statusMsg; }
651 
652         const(char)[] method;
653         const(char)[] uri;
654         int minorVer;
655         int status;
656         const(char)[] statusMsg;
657 
658         private {
659             Header[32] m_headers;
660             size_t m_headersLength;
661         }
662 
663         Header[] headers() return { return m_headers[0..m_headersLength]; }
664     }
665 
666     enum Test { err, complete, partial }
667 }
668 
669 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c
670 
671 @("Request")
672 unittest
673 {
674     auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow @nogc
675     {
676         auto parser = initParser!Msg();
677         auto res = parser.parseRequest(data);
678         // if (res < 0) writeln("Err: ", cast(ParserError)(-res));
679         final switch (test)
680         {
681             case Test.err: assert(res < -ParserError.partial); break;
682             case Test.partial: assert(res == -ParserError.partial); break;
683             case Test.complete: assert(res == data.length - additional); break;
684         }
685 
686         return parser.msg;
687     }
688 
689     // simple
690     {
691         auto req = parse("GET / HTTP/1.0\r\n\r\n");
692         assert(req.headers.length == 0);
693         assert(req.method == "GET");
694         assert(req.uri == "/");
695         assert(req.minorVer == 0);
696     }
697 
698     // parse headers
699     {
700         auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nCookie: \r\n\r\n");
701         assert(req.method == "GET");
702         assert(req.uri == "/hoge");
703         assert(req.minorVer == 1);
704         assert(req.headers.length == 2);
705         assert(req.headers[0] == Header("Host", "example.com"));
706         assert(req.headers[1] == Header("Cookie", ""));
707     }
708 
709     // multibyte included
710     {
711         auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nUser-Agent: \343\201\262\343/1.0\r\n\r\n");
712         assert(req.method == "GET");
713         assert(req.uri == "/hoge");
714         assert(req.minorVer == 1);
715         assert(req.headers.length == 2);
716         assert(req.headers[0] == Header("Host", "example.com"));
717         assert(req.headers[1] == Header("User-Agent", "\343\201\262\343/1.0"));
718     }
719 
720     //multiline
721     {
722         auto req = parse("GET / HTTP/1.0\r\nfoo: \r\nfoo: b\r\n  \tc\r\n\r\n");
723         assert(req.method == "GET");
724         assert(req.uri == "/");
725         assert(req.minorVer == 0);
726         assert(req.headers.length == 3);
727         assert(req.headers[0] == Header("foo", ""));
728         assert(req.headers[1] == Header("foo", "b"));
729         assert(req.headers[2] == Header(null, "  \tc"));
730     }
731 
732     // header name with trailing space
733     parse("GET / HTTP/1.0\r\nfoo : ab\r\n\r\n", Test.err);
734 
735     // incomplete
736     assert(parse("\r", Test.partial).method == null);
737     assert(parse("\r\n", Test.partial).method == null);
738     assert(parse("\r\nGET", Test.partial).method == null);
739     assert(parse("GET", Test.partial).method == null);
740     assert(parse("GET ", Test.partial).method == "GET");
741     assert(parse("GET /", Test.partial).uri == null);
742     assert(parse("GET / ", Test.partial).uri == "/");
743     assert(parse("GET / HTTP/1.1", Test.partial).minorVer == 0);
744     assert(parse("GET / HTTP/1.1\r", Test.partial).minorVer == 1);
745     assert(parse("GET / HTTP/1.1\r\n", Test.partial).minorVer == 1);
746     parse("GET / HTTP/1.0\r\n\r", Test.partial);
747     parse("GET / HTTP/1.0\r\n\r\n", Test.complete);
748     parse(" / HTTP/1.0\r\n\r\n", Test.err); // empty method
749     parse("GET  HTTP/1.0\r\n\r\n", Test.err); // empty request target
750     parse("GET / \r\n\r\n", Test.err); // empty version
751     parse("GET / HTTP/1.0\r\n:a\r\n\r\n", Test.err); // empty header name
752     parse("GET / HTTP/1.0\r\n :a\r\n\r\n", Test.err); // empty header name (space only)
753     parse("G\0T / HTTP/1.0\r\n\r\n", Test.err); // NUL in method
754     parse("G\tT / HTTP/1.0\r\n\r\n", Test.err); // tab in method
755     parse("GET /\x7f HTTP/1.0\r\n\r\n", Test.err); // DEL in uri
756     parse("GET / HTTP/1.0\r\na\0b: c\r\n\r\n", Test.err); // NUL in header name
757     parse("GET / HTTP/1.0\r\nab: c\0d\r\n\r\n", Test.err); // NUL in header value
758     parse("GET / HTTP/1.0\r\na\033b: c\r\n\r\n", Test.err); // CTL in header name
759     parse("GET / HTTP/1.0\r\nab: c\033\r\n\r\n", Test.err); // CTL in header value
760     parse("GET / HTTP/1.0\r\n/: 1\r\n\r\n", Test.err); // invalid char in header value
761     parse("GET   /   HTTP/1.0\r\n\r\n", Test.complete); // multiple spaces between tokens
762 
763     // accept MSB chars
764     {
765         auto res = parse("GET /\xa0 HTTP/1.0\r\nh: c\xa2y\r\n\r\n");
766         assert(res.method == "GET");
767         assert(res.uri == "/\xa0");
768         assert(res.minorVer == 0);
769         assert(res.headers.length == 1);
770         assert(res.headers[0] == Header("h", "c\xa2y"));
771     }
772 
773     parse("GET / HTTP/1.0\r\n\x7b: 1\r\n\r\n", Test.err); // disallow '{'
774 
775     // exclude leading and trailing spaces in header value
776     {
777         auto req = parse("GET / HTTP/1.0\r\nfoo:  a \t \r\n\r\n");
778         assert(req.headers[0].value == "a");
779     }
780 
781     // leave the body intact
782     parse("GET / HTTP/1.0\r\n\r\nfoo bar baz", Test.complete, "foo bar baz".length);
783 
784     // realworld
785     {
786         auto req = parse("GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n");
787         assert(req.method == "GET");
788         assert(req.uri == "/cookies");
789         assert(req.minorVer == 1);
790         assert(req.headers[0] == Header("Host", "127.0.0.1:8090"));
791         assert(req.headers[1] == Header("Connection", "keep-alive"));
792         assert(req.headers[2] == Header("Cache-Control", "max-age=0"));
793         assert(req.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));
794         assert(req.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17"));
795         assert(req.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch"));
796         assert(req.headers[6] == Header("Accept-Language", "en-US,en;q=0.8"));
797         assert(req.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"));
798         assert(req.headers[8] == Header("Cookie", "name=wookie"));
799     }
800 
801     // newline
802     {
803         auto req = parse("GET / HTTP/1.0\nfoo: a\n\n");
804     }
805 }
806 
807 @("Response")
808 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c
809 unittest
810 {
811     auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow
812     {
813         auto parser = initParser!Msg();
814 
815         auto res = parser.parseResponse(data);
816         // if (res < 0) writeln("Err: ", cast(ParserError)(-res));
817         final switch (test)
818         {
819             case Test.err: assert(res < -ParserError.partial); break;
820             case Test.partial: assert(res == -ParserError.partial); break;
821             case Test.complete: assert(res == data.length - additional); break;
822         }
823 
824         return parser.msg;
825     }
826 
827     // simple
828     {
829         auto res = parse("HTTP/1.0 200 OK\r\n\r\n");
830         assert(res.headers.length == 0);
831         assert(res.status == 200);
832         assert(res.minorVer == 0);
833         assert(res.statusMsg == "OK");
834     }
835 
836     parse("HTTP/1.0 200 OK\r\n\r", Test.partial); // partial
837 
838     // parse headers
839     {
840         auto res = parse("HTTP/1.1 200 OK\r\nHost: example.com\r\nCookie: \r\n\r\n");
841         assert(res.headers.length == 2);
842         assert(res.minorVer == 1);
843         assert(res.status == 200);
844         assert(res.statusMsg == "OK");
845         assert(res.headers[0] == Header("Host", "example.com"));
846         assert(res.headers[1] == Header("Cookie", ""));
847     }
848 
849     // parse multiline
850     {
851         auto res = parse("HTTP/1.0 200 OK\r\nfoo: \r\nfoo: b\r\n  \tc\r\n\r\n");
852         assert(res.headers.length == 3);
853         assert(res.minorVer == 0);
854         assert(res.status == 200);
855         assert(res.statusMsg == "OK");
856         assert(res.headers[0] == Header("foo", ""));
857         assert(res.headers[1] == Header("foo", "b"));
858         assert(res.headers[2] == Header(null, "  \tc"));
859     }
860 
861     // internal server error
862     {
863         auto res = parse("HTTP/1.0 500 Internal Server Error\r\n\r\n");
864         assert(res.headers.length == 0);
865         assert(res.minorVer == 0);
866         assert(res.status == 500);
867         assert(res.statusMsg == "Internal Server Error");
868     }
869 
870     parse("H", Test.partial); // incomplete 1
871     parse("HTTP/1.", Test.partial); // incomplete 2
872     assert(parse("HTTP/1.1", Test.partial).minorVer == 0); // incomplete 3 - differs from picohttpparser as we don't parse exact version
873     assert(parse("HTTP/1.1 ", Test.partial).minorVer == 1); // incomplete 4
874     parse("HTTP/1.1 2", Test.partial); // incomplete 5
875     assert(parse("HTTP/1.1 200", Test.partial).status == 0); // incomplete 6
876     assert(parse("HTTP/1.1 200 ", Test.partial).status == 200); // incomplete 7
877     assert(parse("HTTP/1.1 200\r", Test.partial).status == 200); // incomplete 7.1
878     parse("HTTP/1.1 200 O", Test.partial); // incomplete 8
879     assert(parse("HTTP/1.1 200 OK\r", Test.partial).statusMsg == "OK"); // incomplete 9 - differs from picohttpparser
880     assert(parse("HTTP/1.1 200 OK\r\n", Test.partial).statusMsg == "OK"); // incomplete 10
881     assert(parse("HTTP/1.1 200 OK\n", Test.partial).statusMsg == "OK"); // incomplete 11
882     assert(parse("HTTP/1.1 200 OK\r\nA: 1\r", Test.partial).headers.length == 0); // incomplete 11
883     parse("HTTP/1.1   200   OK\r\n\r\n", Test.complete); // multiple spaces between tokens
884 
885     // incomplete 12
886     {
887         auto res = parse("HTTP/1.1 200 OK\r\nA: 1\r\n", Test.partial);
888         assert(res.headers.length == 1);
889         assert(res.headers[0] == Header("A", "1"));
890     }
891 
892     // slowloris (incomplete)
893     {
894         auto parser = initParser!Msg();
895         assert(parser.parseResponse("HTTP/1.0 200 OK\r\n") == -ParserError.partial);
896         assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r") == -ParserError.partial);
897         assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r\nblabla") == "HTTP/1.0 200 OK\r\n\r\n".length);
898     }
899 
900     parse("HTTP/1. 200 OK\r\n\r\n", Test.err); // invalid http version
901     parse("HTTP/1.2z 200 OK\r\n\r\n", Test.err); // invalid http version 2
902     parse("HTTP/1.1  OK\r\n\r\n", Test.err); // no status code
903 
904     assert(parse("HTTP/1.1 200\r\n\r\n").statusMsg == ""); // accept missing trailing whitespace in status-line
905     parse("HTTP/1.1 200X\r\n\r\n", Test.err); // garbage after status 1
906     parse("HTTP/1.1 200X \r\n\r\n", Test.err); // garbage after status 2
907     parse("HTTP/1.1 200X OK\r\n\r\n", Test.err); // garbage after status 3
908 
909     assert(parse("HTTP/1.1 200 OK\r\nbar: \t b\t \t\r\n\r\n").headers[0].value == "b"); // exclude leading and trailing spaces in header value
910 }
911 
912 @("Incremental")
913 unittest
914 {
915     string req = "GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n";
916     auto parser = initParser!Msg();
917     uint parsed;
918     auto res = parser.parseRequest(req[0.."GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConn".length], parsed);
919     assert(res == -ParserError.partial);
920     assert(parser.msg.method == "GET");
921     assert(parser.msg.uri == "/cookies");
922     assert(parser.msg.minorVer == 1);
923     assert(parser.msg.headers.length == 1);
924     assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090"));
925 
926     res = parser.parseRequest(req, parsed);
927     assert(res == req.length);
928     assert(parser.msg.method == "GET");
929     assert(parser.msg.uri == "/cookies");
930     assert(parser.msg.minorVer == 1);
931     assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090"));
932     assert(parser.msg.headers[1] == Header("Connection", "keep-alive"));
933     assert(parser.msg.headers[2] == Header("Cache-Control", "max-age=0"));
934     assert(parser.msg.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"));
935     assert(parser.msg.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17"));
936     assert(parser.msg.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch"));
937     assert(parser.msg.headers[6] == Header("Accept-Language", "en-US,en;q=0.8"));
938     assert(parser.msg.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"));
939     assert(parser.msg.headers[8] == Header("Cookie", "name=wookie"));
940 }
941 
942 //** used intrinsics **//
943 
944 version(LDC)
945 {
946     public import core.simd;
947     public import ldc.intrinsics;
948     import ldc.gccbuiltins_x86;
949 
950     enum LDC_with_SSE42 = __traits(targetHasFeature, "sse4.2");
951 
952     // These specify the type of data that we're comparing.
953     enum _SIDD_UBYTE_OPS            = 0x00;
954     enum _SIDD_UWORD_OPS            = 0x01;
955     enum _SIDD_SBYTE_OPS            = 0x02;
956     enum _SIDD_SWORD_OPS            = 0x03;
957 
958     // These specify the type of comparison operation.
959     enum _SIDD_CMP_EQUAL_ANY        = 0x00;
960     enum _SIDD_CMP_RANGES           = 0x04;
961     enum _SIDD_CMP_EQUAL_EACH       = 0x08;
962     enum _SIDD_CMP_EQUAL_ORDERED    = 0x0c;
963 
964     // These are used in _mm_cmpXstri() to specify the return.
965     enum _SIDD_LEAST_SIGNIFICANT    = 0x00;
966     enum _SIDD_MOST_SIGNIFICANT     = 0x40;
967 
968     // These macros are used in _mm_cmpXstri() to specify the return.
969     enum _SIDD_BIT_MASK             = 0x00;
970     enum _SIDD_UNIT_MASK            = 0x40;
971 
972     // some definition aliases to commonly used names
973     alias __m128i = int4;
974 
975     // some used methods aliases
976     alias _expect = llvm_expect;
977     alias _mm_loadu_si128 = loadUnaligned!__m128i;
978     alias _mm_cmpestri = __builtin_ia32_pcmpestri128;
979 }
980 else
981 {
982     enum LDC_with_SSE42 = false;
983 
984     T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T))
985     {
986         pragma(inline, true);
987         return val;
988     }
989 }
990 
991 pragma(msg, "SSE: ", LDC_with_SSE42);