1 /** 2 * 3 */ 4 module httparsed; 5 6 nothrow @safe @nogc: 7 8 /// Parser error codes 9 enum ParserError : int 10 { 11 partial = 1, /// not enough data to parse message 12 newLine, /// invalid character in new line 13 headerName, /// invalid character in header name 14 headerValue, /// invalid header value 15 status, /// invalid character in response status 16 token, /// invalid character in token 17 noHeaderName, /// empty header name 18 noMethod, /// no method in request line 19 noVersion, /// no version in request line / response status line 20 noUri, /// no URI in request line 21 noStatus, /// no status code or text in status line 22 invalidMethod, /// invalid method in request line 23 invalidVersion, /// invalid version for the protocol message 24 } 25 26 /// Helper function to initialize message parser 27 auto initParser(MSG, Args...)(Args args) { return MsgParser!MSG(args); } 28 29 /** 30 * HTTP/RTSP message parser. 31 */ 32 struct MsgParser(MSG) 33 { 34 import std.traits : ForeachType, isArray, Unqual; 35 36 this(Args...)(Args args) 37 { 38 this.msg = MSG(args); 39 } 40 41 /** 42 * Parses message request (request line + headers). 43 * 44 * Params: 45 * - buffer = buffer to parse message from 46 * - lastPos = optional argument to store / pass previous position to which message was 47 * already parsed (speeds up parsing when message comes in parts) 48 * 49 * Returns: 50 * * parsed message header length when parsed sucessfully 51 * * `-ParserError` on error (ie. -1 when message header is not complete yet) 52 */ 53 int parseRequest(T)(T buffer, ref uint lastPos) 54 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 55 { 56 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos); 57 else return parse!parseRequestLine(buffer, lastPos); 58 } 59 60 /// ditto 61 int parseRequest(T)(T buffer) 62 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 63 { 64 uint lastPos; 65 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos); 66 else return parse!parseRequestLine(buffer, lastPos); 67 } 68 69 /** 70 * Parses message response (status line + headers). 71 * 72 * Params: 73 * - buffer = buffer to parse message from 74 * - lastPos = optional argument to store / pass previous position to which message was 75 * already parsed (speeds up parsing when message comes in parts) 76 * 77 * Returns: 78 * * parsed message header length when parsed sucessfully 79 * * `-ParserError.partial` on error (ie. -1 when message header is not comlete yet) 80 */ 81 int parseResponse(T)(T buffer, ref uint lastPos) 82 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 83 { 84 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos); 85 else return parse!parseStatusLine(buffer, lastPos); 86 } 87 88 /// ditto 89 int parseResponse(T)(T buffer) 90 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 91 { 92 uint lastPos; 93 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos); 94 else return parse!parseStatusLine(buffer, lastPos); 95 } 96 97 /// Gets provided structure used during parsing 98 ref MSG msg() return { return m_msg; } 99 100 alias msg this; 101 102 private: 103 104 // character map of valid characters for token, forbidden: 105 // 0-SP, DEL, HT 106 // ()<>@,;:\"/[]?={} 107 enum tokenRanges = "\0 \"\"(),,//:@[]{}\x7f\xff"; 108 enum tokenSSERanges = "\0 \"\"(),,//:@[]{\xff"; // merge of last range due to the SSE register size limit 109 110 enum versionRanges = "\0-:@[`{\xff"; // allow only [A-Za-z./] characters 111 112 MSG m_msg; 113 114 int parse(alias pred)(const(ubyte)[] buffer, ref uint lastPos) 115 { 116 assert(buffer.length >= lastPos); 117 immutable l = buffer.length; 118 119 if (_expect(!lastPos, true)) 120 { 121 if (_expect(!buffer.length, false)) return err(ParserError.partial); 122 123 // skip first empty line (some clients add CRLF after POST content) 124 if (_expect(buffer[0] == '\r', false)) 125 { 126 if (_expect(buffer.length == 1, false)) return err(ParserError.partial); 127 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine); 128 lastPos += 2; 129 buffer = buffer[lastPos..$]; 130 } 131 else if (_expect(buffer[0] == '\n', false)) 132 buffer = buffer[++lastPos..$]; 133 134 immutable res = pred(buffer); 135 if (_expect(res < 0, false)) return res; 136 137 lastPos = cast(int)(l - buffer.length); // store index of last parsed line 138 } 139 else buffer = buffer[lastPos..$]; // skip already parsed lines 140 141 immutable hdrRes = parseHeaders(buffer); 142 lastPos = cast(int)(l - buffer.length); // store index of last parsed line 143 144 if (_expect(hdrRes < 0, false)) return hdrRes; 145 return lastPos; // finished 146 } 147 148 int parseHeaders(ref const(ubyte)[] buffer) 149 { 150 bool hasHeader; 151 size_t start, i; 152 const(ubyte)[] name, value; 153 while (true) 154 { 155 // check for msg headers end 156 if (_expect(buffer.length == 0, false)) return err(ParserError.partial); 157 if (buffer[0] == '\r') 158 { 159 if (_expect(buffer.length == 1, false)) return err(ParserError.partial); 160 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine); 161 162 buffer = buffer[2..$]; 163 return 0; 164 } 165 if (_expect(buffer[0] == '\n', false)) 166 { 167 buffer = buffer[1..$]; 168 return 0; 169 } 170 171 if (!hasHeader || (buffer[i] != ' ' && buffer[i] != '\t')) 172 { 173 auto ret = parseToken!(tokenRanges, ':', tokenSSERanges)(buffer, i); 174 if (_expect(ret < 0, false)) return ret; 175 if (_expect(start == i, false)) return err(ParserError.noHeaderName); 176 name = buffer[start..i]; // store header name 177 i++; // move index after colon 178 179 // skip over SP and HT 180 for (;; ++i) 181 { 182 if (_expect(i == buffer.length, false)) return err(ParserError.partial); 183 if (buffer[i] != ' ' && buffer[i] != '\t') break; 184 } 185 start = i; 186 } 187 else name = null; // multiline header 188 189 // parse value 190 auto ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i); 191 if (_expect(ret < 0, false)) return ret; 192 value = buffer[start..i]; 193 mixin(advanceNewline); 194 hasHeader = true; // flag to define that we can now accept multiline header values 195 static if (__traits(hasMember, m_msg, "onHeader")) 196 { 197 // remove trailing SPs and HTABs 198 if (_expect(value.length && (value[$-1] == ' ' || value[$-1] == '\t'), false)) 199 { 200 int j = cast(int)value.length - 2; 201 for (; j >= 0; --j) 202 if (!(value[j] == ' ' || value[j] == '\t')) 203 break; 204 value = value[0..j+1]; 205 } 206 207 static if (is(typeof(m_msg.onHeader("", "")) == void)) 208 m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value); 209 else { 210 auto r = m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value); 211 if (_expect(r < 0, false)) return r; 212 } 213 } 214 215 // header line completed -> advance buffer 216 buffer = buffer[i..$]; 217 start = i = 0; 218 } 219 assert(0); 220 } 221 222 auto parseRequestLine(ref const(ubyte)[] buffer) 223 { 224 size_t start, i; 225 226 // METHOD 227 auto ret = parseToken!(tokenRanges, ' ', tokenSSERanges)(buffer, i); 228 if (_expect(ret < 0, false)) return ret; 229 if (_expect(start == i, false)) return err(ParserError.noMethod); 230 231 static if (__traits(hasMember, m_msg, "onMethod")) 232 { 233 static if (is(typeof(m_msg.onMethod("")) == void)) 234 m_msg.onMethod(cast(const(char)[])buffer[start..i]); 235 else { 236 auto r = m_msg.onMethod(cast(const(char)[])buffer[start..i]); 237 if (_expect(r < 0, false)) return r; 238 } 239 } 240 mixin(skipSpaces!(ParserError.noUri)); 241 start = i; 242 243 // PATH 244 ret = parseToken!("\000\040\177\177", ' ')(buffer, i); 245 if (_expect(ret < 0, false)) return ret; 246 static if (__traits(hasMember, m_msg, "onUri")) 247 { 248 static if (is(typeof(m_msg.onUri("")) == void)) 249 m_msg.onUri(cast(const(char)[])buffer[start..i]); 250 else { 251 auto ur = m_msg.onUri(cast(const(char)[])buffer[start..i]); 252 if (_expect(ur < 0, false)) return ur; 253 } 254 } 255 mixin(skipSpaces!(ParserError.noVersion)); 256 start = i; 257 258 // VERSION 259 ret = parseToken!(versionRanges, "\r\n")(buffer, i); 260 if (_expect(ret < 0, false)) return ret; 261 static if (__traits(hasMember, m_msg, "onVersion")) 262 { 263 static if (is(typeof(m_msg.onVersion("")) == void)) 264 m_msg.onVersion(cast(const(char)[])buffer[start..i]); 265 else { 266 auto vr = m_msg.onVersion(cast(const(char)[])buffer[start..i]); 267 if (_expect(vr < 0, false)) return vr; 268 } 269 } 270 mixin(advanceNewline); 271 272 // advance buffer after the request line 273 buffer = buffer[i..$]; 274 return 0; 275 } 276 277 auto parseStatusLine(ref const(ubyte)[] buffer) 278 { 279 size_t start, i; 280 281 // VERSION 282 auto ret = parseToken!(versionRanges, ' ')(buffer, i); 283 if (_expect(ret < 0, false)) return ret; 284 if (_expect(start == i, false)) return err(ParserError.noVersion); 285 static if (__traits(hasMember, m_msg, "onVersion")) 286 { 287 static if (is(typeof(m_msg.onVersion("")) == void)) 288 m_msg.onVersion(cast(const(char)[])buffer[start..i]); 289 else { 290 auto r = m_msg.onVersion(cast(const(char)[])buffer[start..i]); 291 if (_expect(r < 0, false)) return r; 292 } 293 } 294 mixin(skipSpaces!(ParserError.noStatus)); 295 start = i; 296 297 // STATUS CODE 298 if (_expect(i+3 >= buffer.length, false)) 299 return err(ParserError.partial); // not enough data - we want at least [:digit:][:digit:][:digit:]<other char> to try to parse 300 301 int code; 302 foreach (j, m; [100, 10, 1]) 303 { 304 if (buffer[i+j] < '0' || buffer[i+j] > '9') return err(ParserError.status); 305 code += (buffer[start+j] - '0') * m; 306 } 307 i += 3; 308 static if (__traits(hasMember, m_msg, "onStatus")) 309 { 310 static if (is(typeof(m_msg.onStatus(code)) == void)) 311 m_msg.onStatus(code); 312 else { 313 auto sr = m_msg.onStatus(code); 314 if (_expect(sr < 0, false)) return sr; 315 } 316 } 317 if (_expect(i == buffer.length, false)) 318 return err(ParserError.partial); 319 if (_expect(buffer[i] != ' ' && buffer[i] != '\r' && buffer[i] != '\n', false)) 320 return err(ParserError.status); // Garbage after status 321 322 start = i; 323 324 // MESSAGE 325 ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i); 326 if (_expect(ret < 0, false)) return ret; 327 static if (__traits(hasMember, m_msg, "onStatusMsg")) 328 { 329 // remove preceding space (we did't advance over spaces because possibly missing status message) 330 if (i > start) 331 { 332 while (buffer[start] == ' ' && start < i) start++; 333 if (i > start) 334 { 335 static if (is(typeof(m_msg.onStatusMsg("")) == void)) 336 m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]); 337 else { 338 auto smr = m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]); 339 if (_expect(smr < 0, false)) return smr; 340 } 341 } 342 } 343 } 344 mixin(advanceNewline); 345 346 // advance buffer after the status line 347 buffer = buffer[i..$]; 348 return 0; 349 } 350 351 /* 352 * Advances buffer over the token to the next character while checking for valid characters. 353 * On success, buffer index is left on the next character. 354 * 355 * Params: 356 * - ranges = ranges of characters to stop on 357 * - sseRanges = if null, same ranges is used, but they are limited to 8 ranges 358 * - next = next character/s to stop on (must be present in the provided ranges too) 359 * Returns: 0 on success error code otherwise 360 */ 361 int parseToken(string ranges, alias next, string sseRanges = null)(const(ubyte)[] buffer, ref size_t i) pure 362 { 363 version (DigitalMars) { 364 static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this 365 } else pragma(inline, true); 366 367 immutable charMap = parseTokenCharMap!(ranges)(); 368 369 static if (LDC_with_SSE42) 370 { 371 // CT function to prepare input for SIMD vector enum 372 static byte[16] padRanges()(string ranges) 373 { 374 byte[16] res; 375 // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests 376 foreach (i, c; ranges) res[i] = cast(byte)c; 377 return res; 378 } 379 380 static if (sseRanges) alias usedRng = sseRanges; 381 else alias usedRng = ranges; 382 static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long"); 383 static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters"); 384 enum rangesSize = usedRng.length; 385 enum byte16 rngE = padRanges(usedRng); 386 387 if (_expect(buffer.length - i >= 16, true)) 388 { 389 size_t left = (buffer.length - i) & ~15; // round down to multiple of 16 390 byte16 ranges16 = rngE; 391 392 do 393 { 394 byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }(); 395 immutable r = _mm_cmpestri( 396 ranges16, rangesSize, 397 b16, 16, 398 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS 399 ); 400 401 if (r != 16) 402 { 403 i += r; 404 goto FOUND; 405 } 406 i += 16; 407 left -= 16; 408 } 409 while (_expect(left != 0, true)); 410 } 411 } 412 else 413 { 414 // faster unrolled loop to iterate over 8 characters 415 loop: while (_expect(buffer.length - i >= 8, true)) 416 { 417 static foreach (_; 0..8) 418 { 419 if (_expect(!charMap[buffer[i]], false)) goto FOUND; 420 ++i; 421 } 422 } 423 } 424 425 // handle the rest 426 if (_expect(i >= buffer.length, false)) return err(ParserError.partial); 427 428 FOUND: 429 while (true) 430 { 431 static if (is(typeof(next) == char)) { 432 static assert(!charMap[next], "Next character is not in ranges"); 433 if (buffer[i] == next) return 0; 434 } else { 435 static assert(next.length > 0, "Next character not provided"); 436 static foreach (c; next) { 437 static assert(!charMap[c], "Next character is not in ranges"); 438 if (buffer[i] == c) return 0; 439 } 440 } 441 if (_expect(!charMap[buffer[i]], false)) return err(ParserError.token); 442 if (_expect(++i == buffer.length, false)) return err(ParserError.partial); 443 } 444 } 445 446 // advances over new line 447 enum advanceNewline = q{ 448 assert(i < buffer.length); 449 if (_expect(buffer[i] == '\r', true)) 450 { 451 if (_expect(i+1 == buffer.length, false)) return err(ParserError.partial); 452 if (_expect(buffer[i+1] != '\n', false)) return err(ParserError.newLine); 453 i += 2; 454 } 455 else if (buffer[i] == '\n') ++i; 456 else assert(0); 457 }; 458 459 // skips over spaces in the buffer 460 template skipSpaces(ParserError err) 461 { 462 enum skipSpaces = ` 463 do { 464 ++i; 465 if (_expect(buffer.length == i, false)) return err(ParserError.partial); 466 if (_expect(buffer[i] == '\r' || buffer[i] == '\n', false)) return err(` ~ err.stringof ~ `); 467 } while (buffer[i] == ' '); 468 `; 469 } 470 } 471 472 /// 473 @("example") 474 unittest 475 { 476 // init parser 477 auto reqParser = initParser!Msg(); // or `MsgParser!MSG reqParser;` 478 auto resParser = initParser!Msg(); // or `MsgParser!MSG resParser;` 479 480 // parse request 481 string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n"; 482 // returns parsed message header length when parsed sucessfully, -ParserError on error 483 int res = reqParser.parseRequest(data); 484 assert(res == data.length); 485 assert(reqParser.method == "GET"); 486 assert(reqParser.uri == "/foo"); 487 assert(reqParser.minorVer == 1); // HTTP/1.1 488 assert(reqParser.headers.length == 1); 489 assert(reqParser.headers[0].name == "Host"); 490 assert(reqParser.headers[0].value == "127.0.0.1:8090"); 491 492 // parse response 493 data = "HTTP/1.0 200 OK\r\n"; 494 uint lastPos; // store last parsed position for next run 495 res = resParser.parseResponse(data, lastPos); 496 assert(res == -ParserError.partial); // no complete message header yet 497 data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo"; 498 res = resParser.parseResponse(data, lastPos); // starts parsing from previous position 499 assert(res == data.length - 3); // whole message header parsed, body left to be handled based on actual header values 500 assert(resParser.minorVer == 0); // HTTP/1.0 501 assert(resParser.status == 200); 502 assert(resParser.statusMsg == "OK"); 503 assert(resParser.headers.length == 2); 504 assert(resParser.headers[0].name == "Content-Type"); 505 assert(resParser.headers[0].value == "text/plain"); 506 assert(resParser.headers[1].name == "Content-Length"); 507 assert(resParser.headers[1].value == "3"); 508 } 509 510 /** 511 * Parses HTTP version from a slice returned in `onVersion` callback. 512 * 513 * Returns: minor version (0 for HTTP/1.0 or 1 for HTTP/1.1) on success or 514 * `-ParserError.invalidVersion` on error 515 */ 516 int parseHttpVersion(const(char)[] ver) pure 517 { 518 if (_expect(ver.length != 8, false)) return err(ParserError.invalidVersion); 519 520 static foreach (i, c; "HTTP/1.") 521 if (_expect(ver[i] != c, false)) return err(ParserError.invalidVersion); 522 523 if (_expect(ver[7] < '0' || ver[7] > '9', false)) return err(ParserError.invalidVersion); 524 return ver[7] - '0'; 525 } 526 527 @("parseHttpVersion") 528 unittest 529 { 530 assert(parseHttpVersion("FOO") < 0); 531 assert(parseHttpVersion("HTTP/1.") < 0); 532 assert(parseHttpVersion("HTTP/1.12") < 0); 533 assert(parseHttpVersion("HTTP/1.a") < 0); 534 assert(parseHttpVersion("HTTP/2.0") < 0); 535 assert(parseHttpVersion("HTTP/1.00") < 0); 536 assert(parseHttpVersion("HTTP/1.0") == 0); 537 assert(parseHttpVersion("HTTP/1.1") == 1); 538 } 539 540 version (CI_MAIN) 541 { 542 // workaround for dub not supporting unittests with betterC 543 version (D_BetterC) 544 { 545 extern(C) void main() @trusted { 546 import core.stdc.stdio; 547 static foreach(u; __traits(getUnitTests, httparsed)) 548 { 549 static if (__traits(getAttributes, u).length) 550 printf("unittest %s:%d | '" ~ __traits(getAttributes, u)[0] ~ "'\n", __traits(getLocation, u)[0].ptr, __traits(getLocation, u)[1]); 551 else 552 printf("unittest %s:%d\n", __traits(getLocation, u)[0].ptr, __traits(getLocation, u)[1]); 553 u(); 554 } 555 debug printf("All unit tests have been run successfully.\n"); 556 } 557 } 558 else 559 { 560 void main() 561 { 562 version (unittest) {} // run automagically 563 else 564 { 565 import core.stdc.stdio; 566 567 // just a compilation test 568 auto reqParser = initParser!Msg(); 569 auto resParser = initParser!Msg(); 570 571 string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n"; 572 int res = reqParser.parseRequest(data); 573 assert(res == data.length); 574 575 data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo"; 576 res = resParser.parseResponse(data); 577 assert(res == data.length - 3); 578 () @trusted { printf("Test app works\n"); }(); 579 } 580 } 581 } 582 } 583 584 private: 585 586 int err(ParserError e) pure { pragma(inline, true); return -(cast(int)e); } 587 588 /// Builds valid char map from the provided ranges of invalid ones 589 bool[256] buildValidCharMap()(string invalidRanges) 590 { 591 assert(invalidRanges.length % 2 == 0, "Uneven ranges"); 592 bool[256] res = true; 593 594 for (int i=0; i < invalidRanges.length; i+=2) 595 for (int j=invalidRanges[i]; j <= invalidRanges[i+1]; ++j) 596 res[j] = false; 597 return res; 598 } 599 600 @("buildValidCharMap") 601 unittest 602 { 603 string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff"; 604 assert(buildValidCharMap(ranges) == 605 cast(bool[])[ 606 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 607 0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, 608 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1, 609 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0, 610 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 611 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 612 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 613 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 614 ]); 615 } 616 617 immutable(bool[256]) parseTokenCharMap(string invalidRanges)() { 618 static immutable charMap = buildValidCharMap(invalidRanges); 619 return charMap; 620 } 621 622 version (unittest) version = WITH_MSG; 623 else version (CI_MAIN) version = WITH_MSG; 624 625 version (WITH_MSG) 626 { 627 // define our message content handler 628 struct Header 629 { 630 const(char)[] name; 631 const(char)[] value; 632 } 633 634 // Just store slices of parsed message header 635 struct Msg 636 { 637 @safe pure nothrow @nogc: 638 void onMethod(const(char)[] method) { this.method = method; } 639 void onUri(const(char)[] uri) { this.uri = uri; } 640 int onVersion(const(char)[] ver) 641 { 642 minorVer = parseHttpVersion(ver); 643 return minorVer >= 0 ? 0 : minorVer; 644 } 645 void onHeader(const(char)[] name, const(char)[] value) { 646 this.m_headers[m_headersLength].name = name; 647 this.m_headers[m_headersLength++].value = value; 648 } 649 void onStatus(int status) { this.status = status; } 650 void onStatusMsg(const(char)[] statusMsg) { this.statusMsg = statusMsg; } 651 652 const(char)[] method; 653 const(char)[] uri; 654 int minorVer; 655 int status; 656 const(char)[] statusMsg; 657 658 private { 659 Header[32] m_headers; 660 size_t m_headersLength; 661 } 662 663 Header[] headers() return { return m_headers[0..m_headersLength]; } 664 } 665 666 enum Test { err, complete, partial } 667 } 668 669 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c 670 671 @("Request") 672 unittest 673 { 674 auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow @nogc 675 { 676 auto parser = initParser!Msg(); 677 auto res = parser.parseRequest(data); 678 // if (res < 0) writeln("Err: ", cast(ParserError)(-res)); 679 final switch (test) 680 { 681 case Test.err: assert(res < -ParserError.partial); break; 682 case Test.partial: assert(res == -ParserError.partial); break; 683 case Test.complete: assert(res == data.length - additional); break; 684 } 685 686 return parser.msg; 687 } 688 689 // simple 690 { 691 auto req = parse("GET / HTTP/1.0\r\n\r\n"); 692 assert(req.headers.length == 0); 693 assert(req.method == "GET"); 694 assert(req.uri == "/"); 695 assert(req.minorVer == 0); 696 } 697 698 // parse headers 699 { 700 auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nCookie: \r\n\r\n"); 701 assert(req.method == "GET"); 702 assert(req.uri == "/hoge"); 703 assert(req.minorVer == 1); 704 assert(req.headers.length == 2); 705 assert(req.headers[0] == Header("Host", "example.com")); 706 assert(req.headers[1] == Header("Cookie", "")); 707 } 708 709 // multibyte included 710 { 711 auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nUser-Agent: \343\201\262\343/1.0\r\n\r\n"); 712 assert(req.method == "GET"); 713 assert(req.uri == "/hoge"); 714 assert(req.minorVer == 1); 715 assert(req.headers.length == 2); 716 assert(req.headers[0] == Header("Host", "example.com")); 717 assert(req.headers[1] == Header("User-Agent", "\343\201\262\343/1.0")); 718 } 719 720 //multiline 721 { 722 auto req = parse("GET / HTTP/1.0\r\nfoo: \r\nfoo: b\r\n \tc\r\n\r\n"); 723 assert(req.method == "GET"); 724 assert(req.uri == "/"); 725 assert(req.minorVer == 0); 726 assert(req.headers.length == 3); 727 assert(req.headers[0] == Header("foo", "")); 728 assert(req.headers[1] == Header("foo", "b")); 729 assert(req.headers[2] == Header(null, " \tc")); 730 } 731 732 // header name with trailing space 733 parse("GET / HTTP/1.0\r\nfoo : ab\r\n\r\n", Test.err); 734 735 // incomplete 736 assert(parse("\r", Test.partial).method == null); 737 assert(parse("\r\n", Test.partial).method == null); 738 assert(parse("\r\nGET", Test.partial).method == null); 739 assert(parse("GET", Test.partial).method == null); 740 assert(parse("GET ", Test.partial).method == "GET"); 741 assert(parse("GET /", Test.partial).uri == null); 742 assert(parse("GET / ", Test.partial).uri == "/"); 743 assert(parse("GET / HTTP/1.1", Test.partial).minorVer == 0); 744 assert(parse("GET / HTTP/1.1\r", Test.partial).minorVer == 1); 745 assert(parse("GET / HTTP/1.1\r\n", Test.partial).minorVer == 1); 746 parse("GET / HTTP/1.0\r\n\r", Test.partial); 747 parse("GET / HTTP/1.0\r\n\r\n", Test.complete); 748 parse(" / HTTP/1.0\r\n\r\n", Test.err); // empty method 749 parse("GET HTTP/1.0\r\n\r\n", Test.err); // empty request target 750 parse("GET / \r\n\r\n", Test.err); // empty version 751 parse("GET / HTTP/1.0\r\n:a\r\n\r\n", Test.err); // empty header name 752 parse("GET / HTTP/1.0\r\n :a\r\n\r\n", Test.err); // empty header name (space only) 753 parse("G\0T / HTTP/1.0\r\n\r\n", Test.err); // NUL in method 754 parse("G\tT / HTTP/1.0\r\n\r\n", Test.err); // tab in method 755 parse("GET /\x7f HTTP/1.0\r\n\r\n", Test.err); // DEL in uri 756 parse("GET / HTTP/1.0\r\na\0b: c\r\n\r\n", Test.err); // NUL in header name 757 parse("GET / HTTP/1.0\r\nab: c\0d\r\n\r\n", Test.err); // NUL in header value 758 parse("GET / HTTP/1.0\r\na\033b: c\r\n\r\n", Test.err); // CTL in header name 759 parse("GET / HTTP/1.0\r\nab: c\033\r\n\r\n", Test.err); // CTL in header value 760 parse("GET / HTTP/1.0\r\n/: 1\r\n\r\n", Test.err); // invalid char in header value 761 parse("GET / HTTP/1.0\r\n\r\n", Test.complete); // multiple spaces between tokens 762 763 // accept MSB chars 764 { 765 auto res = parse("GET /\xa0 HTTP/1.0\r\nh: c\xa2y\r\n\r\n"); 766 assert(res.method == "GET"); 767 assert(res.uri == "/\xa0"); 768 assert(res.minorVer == 0); 769 assert(res.headers.length == 1); 770 assert(res.headers[0] == Header("h", "c\xa2y")); 771 } 772 773 parse("GET / HTTP/1.0\r\n\x7b: 1\r\n\r\n", Test.err); // disallow '{' 774 775 // exclude leading and trailing spaces in header value 776 { 777 auto req = parse("GET / HTTP/1.0\r\nfoo: a \t \r\n\r\n"); 778 assert(req.headers[0].value == "a"); 779 } 780 781 // leave the body intact 782 parse("GET / HTTP/1.0\r\n\r\nfoo bar baz", Test.complete, "foo bar baz".length); 783 784 // realworld 785 { 786 auto req = parse("GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n"); 787 assert(req.method == "GET"); 788 assert(req.uri == "/cookies"); 789 assert(req.minorVer == 1); 790 assert(req.headers[0] == Header("Host", "127.0.0.1:8090")); 791 assert(req.headers[1] == Header("Connection", "keep-alive")); 792 assert(req.headers[2] == Header("Cache-Control", "max-age=0")); 793 assert(req.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); 794 assert(req.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17")); 795 assert(req.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch")); 796 assert(req.headers[6] == Header("Accept-Language", "en-US,en;q=0.8")); 797 assert(req.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); 798 assert(req.headers[8] == Header("Cookie", "name=wookie")); 799 } 800 801 // newline 802 { 803 auto req = parse("GET / HTTP/1.0\nfoo: a\n\n"); 804 } 805 } 806 807 @("Response") 808 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c 809 unittest 810 { 811 auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow 812 { 813 auto parser = initParser!Msg(); 814 815 auto res = parser.parseResponse(data); 816 // if (res < 0) writeln("Err: ", cast(ParserError)(-res)); 817 final switch (test) 818 { 819 case Test.err: assert(res < -ParserError.partial); break; 820 case Test.partial: assert(res == -ParserError.partial); break; 821 case Test.complete: assert(res == data.length - additional); break; 822 } 823 824 return parser.msg; 825 } 826 827 // simple 828 { 829 auto res = parse("HTTP/1.0 200 OK\r\n\r\n"); 830 assert(res.headers.length == 0); 831 assert(res.status == 200); 832 assert(res.minorVer == 0); 833 assert(res.statusMsg == "OK"); 834 } 835 836 parse("HTTP/1.0 200 OK\r\n\r", Test.partial); // partial 837 838 // parse headers 839 { 840 auto res = parse("HTTP/1.1 200 OK\r\nHost: example.com\r\nCookie: \r\n\r\n"); 841 assert(res.headers.length == 2); 842 assert(res.minorVer == 1); 843 assert(res.status == 200); 844 assert(res.statusMsg == "OK"); 845 assert(res.headers[0] == Header("Host", "example.com")); 846 assert(res.headers[1] == Header("Cookie", "")); 847 } 848 849 // parse multiline 850 { 851 auto res = parse("HTTP/1.0 200 OK\r\nfoo: \r\nfoo: b\r\n \tc\r\n\r\n"); 852 assert(res.headers.length == 3); 853 assert(res.minorVer == 0); 854 assert(res.status == 200); 855 assert(res.statusMsg == "OK"); 856 assert(res.headers[0] == Header("foo", "")); 857 assert(res.headers[1] == Header("foo", "b")); 858 assert(res.headers[2] == Header(null, " \tc")); 859 } 860 861 // internal server error 862 { 863 auto res = parse("HTTP/1.0 500 Internal Server Error\r\n\r\n"); 864 assert(res.headers.length == 0); 865 assert(res.minorVer == 0); 866 assert(res.status == 500); 867 assert(res.statusMsg == "Internal Server Error"); 868 } 869 870 parse("H", Test.partial); // incomplete 1 871 parse("HTTP/1.", Test.partial); // incomplete 2 872 assert(parse("HTTP/1.1", Test.partial).minorVer == 0); // incomplete 3 - differs from picohttpparser as we don't parse exact version 873 assert(parse("HTTP/1.1 ", Test.partial).minorVer == 1); // incomplete 4 874 parse("HTTP/1.1 2", Test.partial); // incomplete 5 875 assert(parse("HTTP/1.1 200", Test.partial).status == 0); // incomplete 6 876 assert(parse("HTTP/1.1 200 ", Test.partial).status == 200); // incomplete 7 877 assert(parse("HTTP/1.1 200\r", Test.partial).status == 200); // incomplete 7.1 878 parse("HTTP/1.1 200 O", Test.partial); // incomplete 8 879 assert(parse("HTTP/1.1 200 OK\r", Test.partial).statusMsg == "OK"); // incomplete 9 - differs from picohttpparser 880 assert(parse("HTTP/1.1 200 OK\r\n", Test.partial).statusMsg == "OK"); // incomplete 10 881 assert(parse("HTTP/1.1 200 OK\n", Test.partial).statusMsg == "OK"); // incomplete 11 882 assert(parse("HTTP/1.1 200 OK\r\nA: 1\r", Test.partial).headers.length == 0); // incomplete 11 883 parse("HTTP/1.1 200 OK\r\n\r\n", Test.complete); // multiple spaces between tokens 884 885 // incomplete 12 886 { 887 auto res = parse("HTTP/1.1 200 OK\r\nA: 1\r\n", Test.partial); 888 assert(res.headers.length == 1); 889 assert(res.headers[0] == Header("A", "1")); 890 } 891 892 // slowloris (incomplete) 893 { 894 auto parser = initParser!Msg(); 895 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n") == -ParserError.partial); 896 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r") == -ParserError.partial); 897 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r\nblabla") == "HTTP/1.0 200 OK\r\n\r\n".length); 898 } 899 900 parse("HTTP/1. 200 OK\r\n\r\n", Test.err); // invalid http version 901 parse("HTTP/1.2z 200 OK\r\n\r\n", Test.err); // invalid http version 2 902 parse("HTTP/1.1 OK\r\n\r\n", Test.err); // no status code 903 904 assert(parse("HTTP/1.1 200\r\n\r\n").statusMsg == ""); // accept missing trailing whitespace in status-line 905 parse("HTTP/1.1 200X\r\n\r\n", Test.err); // garbage after status 1 906 parse("HTTP/1.1 200X \r\n\r\n", Test.err); // garbage after status 2 907 parse("HTTP/1.1 200X OK\r\n\r\n", Test.err); // garbage after status 3 908 909 assert(parse("HTTP/1.1 200 OK\r\nbar: \t b\t \t\r\n\r\n").headers[0].value == "b"); // exclude leading and trailing spaces in header value 910 } 911 912 @("Incremental") 913 unittest 914 { 915 string req = "GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n"; 916 auto parser = initParser!Msg(); 917 uint parsed; 918 auto res = parser.parseRequest(req[0.."GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConn".length], parsed); 919 assert(res == -ParserError.partial); 920 assert(parser.msg.method == "GET"); 921 assert(parser.msg.uri == "/cookies"); 922 assert(parser.msg.minorVer == 1); 923 assert(parser.msg.headers.length == 1); 924 assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090")); 925 926 res = parser.parseRequest(req, parsed); 927 assert(res == req.length); 928 assert(parser.msg.method == "GET"); 929 assert(parser.msg.uri == "/cookies"); 930 assert(parser.msg.minorVer == 1); 931 assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090")); 932 assert(parser.msg.headers[1] == Header("Connection", "keep-alive")); 933 assert(parser.msg.headers[2] == Header("Cache-Control", "max-age=0")); 934 assert(parser.msg.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); 935 assert(parser.msg.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17")); 936 assert(parser.msg.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch")); 937 assert(parser.msg.headers[6] == Header("Accept-Language", "en-US,en;q=0.8")); 938 assert(parser.msg.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); 939 assert(parser.msg.headers[8] == Header("Cookie", "name=wookie")); 940 } 941 942 //** used intrinsics **// 943 944 version(LDC) 945 { 946 public import core.simd; 947 public import ldc.intrinsics; 948 import ldc.gccbuiltins_x86; 949 950 enum LDC_with_SSE42 = __traits(targetHasFeature, "sse4.2"); 951 952 // These specify the type of data that we're comparing. 953 enum _SIDD_UBYTE_OPS = 0x00; 954 enum _SIDD_UWORD_OPS = 0x01; 955 enum _SIDD_SBYTE_OPS = 0x02; 956 enum _SIDD_SWORD_OPS = 0x03; 957 958 // These specify the type of comparison operation. 959 enum _SIDD_CMP_EQUAL_ANY = 0x00; 960 enum _SIDD_CMP_RANGES = 0x04; 961 enum _SIDD_CMP_EQUAL_EACH = 0x08; 962 enum _SIDD_CMP_EQUAL_ORDERED = 0x0c; 963 964 // These are used in _mm_cmpXstri() to specify the return. 965 enum _SIDD_LEAST_SIGNIFICANT = 0x00; 966 enum _SIDD_MOST_SIGNIFICANT = 0x40; 967 968 // These macros are used in _mm_cmpXstri() to specify the return. 969 enum _SIDD_BIT_MASK = 0x00; 970 enum _SIDD_UNIT_MASK = 0x40; 971 972 // some definition aliases to commonly used names 973 alias __m128i = int4; 974 975 // some used methods aliases 976 alias _expect = llvm_expect; 977 alias _mm_loadu_si128 = loadUnaligned!__m128i; 978 alias _mm_cmpestri = __builtin_ia32_pcmpestri128; 979 } 980 else 981 { 982 enum LDC_with_SSE42 = false; 983 984 T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T)) 985 { 986 pragma(inline, true); 987 return val; 988 } 989 } 990 991 pragma(msg, "SSE: ", LDC_with_SSE42);