1 /** 2 * 3 */ 4 module httparsed; 5 6 nothrow @safe @nogc: 7 8 /// Parser error codes 9 enum ParserError : int 10 { 11 partial = 1, /// not enough data to parse message 12 newLine, /// invalid character in new line 13 headerName, /// invalid character in header name 14 status, /// invalid character in response status 15 token, /// invalid character in token 16 noHeaderName, /// empty header name 17 noMethod, /// no method in request line 18 noVersion, /// no version in request line / response status line 19 noUri, /// no URI in request line 20 noStatus, /// no status code or text in status line 21 invalidMethod, /// invalid method in request line 22 invalidVersion, /// invalid version for the protocol message 23 } 24 25 /// Helper function to initialize message parser 26 auto initParser(MSG, Args...)(Args args) { return MsgParser!MSG(args); } 27 28 /** 29 * HTTP/RTSP message parser. 30 */ 31 struct MsgParser(MSG) 32 { 33 import std.traits : ForeachType, isArray, Unqual; 34 35 this(Args...)(Args args) 36 { 37 this.msg = MSG(args); 38 } 39 40 /** 41 * Parses message request (request line + headers). 42 * 43 * Params: 44 * - buffer = buffer to parse message from 45 * - lastPos = optional argument to store / pass previous position to which message was 46 * already parsed (speeds up parsing when message comes in parts) 47 * 48 * Returns: 49 * * parsed message header length when parsed sucessfully 50 * * `-ParserError.partial` on error (ie. -1 when message header is not comlete yet) 51 */ 52 int parseRequest(T)(T buffer, ref uint lastPos) 53 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 54 { 55 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos); 56 else return parse!parseRequestLine(buffer, lastPos); 57 } 58 59 /// ditto 60 int parseRequest(T)(T buffer) 61 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 62 { 63 uint lastPos; 64 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseRequestLine(cast(const(ubyte)[])buffer, lastPos); 65 else return parse!parseRequestLine(buffer, lastPos); 66 } 67 68 /** 69 * Parses message response (status line + headers). 70 * 71 * Params: 72 * - buffer = buffer to parse message from 73 * - lastPos = optional argument to store / pass previous position to which message was 74 * already parsed (speeds up parsing when message comes in parts) 75 * 76 * Returns: 77 * * parsed message header length when parsed sucessfully 78 * * `-ParserError.partial` on error (ie. -1 when message header is not comlete yet) 79 */ 80 int parseResponse(T)(T buffer, ref uint lastPos) 81 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 82 { 83 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos); 84 else return parse!parseStatusLine(buffer, lastPos); 85 } 86 87 /// ditto 88 int parseResponse(T)(T buffer) 89 if (isArray!T && (is(Unqual!(ForeachType!T) == char) || is(Unqual!(ForeachType!T) == ubyte))) 90 { 91 uint lastPos; 92 static if (is(Unqual!(ForeachType!T) == char)) return parse!parseStatusLine(cast(const(ubyte)[])buffer, lastPos); 93 else return parse!parseStatusLine(buffer, lastPos); 94 } 95 96 /// Gets provided structure used during parsing 97 ref MSG msg() return { return m_msg; } 98 99 alias msg this; 100 101 private: 102 103 // character map of valid characters for token, forbidden: 104 // 0-SP, DEL, HT 105 // ()<>@,;:\"/[]?={} 106 enum tokenRanges = "\0 \"\"(),,//:@[]{}\x7f\xff"; 107 enum tokenSSERanges = "\0 \"\"(),,//:@[]{\xff"; // merge of last range due to the SSE register size limit 108 109 enum versionRanges = "\0-:@[`{\xff"; // allow only [A-Za-z./] characters 110 111 MSG m_msg; 112 113 int parse(alias pred)(const(ubyte)[] buffer, ref uint lastPos) 114 { 115 assert(buffer.length >= lastPos); 116 immutable l = buffer.length; 117 118 if (_expect(!lastPos, true)) 119 { 120 if (_expect(!buffer.length, false)) return err(ParserError.partial); 121 122 // skip first empty line (some clients add CRLF after POST content) 123 if (_expect(buffer[0] == '\r', false)) 124 { 125 if (_expect(buffer.length == 1, false)) return err(ParserError.partial); 126 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine); 127 lastPos += 2; 128 buffer = buffer[lastPos..$]; 129 } 130 else if (_expect(buffer[0] == '\n', false)) 131 buffer = buffer[++lastPos..$]; 132 133 immutable res = pred(buffer); 134 if (_expect(res < 0, false)) return res; 135 136 lastPos = cast(int)(l - buffer.length); // store index of last parsed line 137 } 138 else buffer = buffer[lastPos..$]; // skip already parsed lines 139 140 immutable hdrRes = parseHeaders(buffer); 141 lastPos = cast(int)(l - buffer.length); // store index of last parsed line 142 143 if (_expect(hdrRes < 0, false)) return hdrRes; 144 return lastPos; // finished 145 } 146 147 int parseHeaders(ref const(ubyte)[] buffer) 148 { 149 bool hasHeader; 150 size_t start, i; 151 const(ubyte)[] name, value; 152 while (true) 153 { 154 // check for msg headers end 155 if (_expect(buffer.length == 0, false)) return err(ParserError.partial); 156 if (buffer[0] == '\r') 157 { 158 if (_expect(buffer.length == 1, false)) return err(ParserError.partial); 159 if (_expect(buffer[1] != '\n', false)) return err(ParserError.newLine); 160 161 buffer = buffer[2..$]; 162 return 0; 163 } 164 if (_expect(buffer[0] == '\n', false)) 165 { 166 buffer = buffer[1..$]; 167 return 0; 168 } 169 170 if (!hasHeader || (buffer[i] != ' ' && buffer[i] != '\t')) 171 { 172 auto ret = parseToken!(tokenRanges, ':', tokenSSERanges)(buffer, i); 173 if (_expect(ret < 0, false)) return ret; 174 if (_expect(start == i, false)) return err(ParserError.noHeaderName); 175 name = buffer[start..i]; // store header name 176 i++; // move index after colon 177 178 // skip over SP and HT 179 for (;; ++i) 180 { 181 if (_expect(i == buffer.length, false)) return err(ParserError.partial); 182 if (buffer[i] != ' ' && buffer[i] != '\t') break; 183 } 184 start = i; 185 } 186 else name = null; // multiline header 187 188 // parse value 189 auto ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i); 190 if (_expect(ret < 0, false)) return ret; 191 value = buffer[start..i]; 192 mixin(advanceNewline); 193 hasHeader = true; // flag to define that we can now accept multiline header values 194 static if (__traits(hasMember, m_msg, "onHeader")) 195 { 196 // remove trailing SPs and HTABs 197 if (_expect(value.length && (value[$-1] == ' ' || value[$-1] == '\t'), false)) 198 { 199 int j = cast(int)value.length - 2; 200 for (; j >= 0; --j) 201 if (!(value[j] == ' ' || value[j] == '\t')) 202 break; 203 value = value[0..j+1]; 204 } 205 206 static if (is(typeof(m_msg.onHeader("", "")) == void)) 207 m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value); 208 else { 209 auto r = m_msg.onHeader(cast(const(char)[])name, cast(const(char)[])value); 210 if (_expect(r < 0, false)) return r; 211 } 212 } 213 214 // header line completed -> advance buffer 215 buffer = buffer[i..$]; 216 start = i = 0; 217 } 218 assert(0); 219 } 220 221 auto parseRequestLine(ref const(ubyte)[] buffer) 222 { 223 size_t start, i; 224 225 // METHOD 226 auto ret = parseToken!(tokenRanges, ' ', tokenSSERanges)(buffer, i); 227 if (_expect(ret < 0, false)) return ret; 228 if (_expect(start == i, false)) return err(ParserError.noMethod); 229 230 static if (__traits(hasMember, m_msg, "onMethod")) 231 { 232 static if (is(typeof(m_msg.onMethod("")) == void)) 233 m_msg.onMethod(cast(const(char)[])buffer[start..i]); 234 else { 235 auto r = m_msg.onMethod(cast(const(char)[])buffer[start..i]); 236 if (_expect(r < 0, false)) return r; 237 } 238 } 239 mixin(skipSpaces!(ParserError.noUri)); 240 start = i; 241 242 // PATH 243 ret = parseToken!("\000\040\177\177", ' ')(buffer, i); 244 if (_expect(ret < 0, false)) return ret; 245 static if (__traits(hasMember, m_msg, "onUri")) 246 { 247 static if (is(typeof(m_msg.onUri("")) == void)) 248 m_msg.onUri(cast(const(char)[])buffer[start..i]); 249 else { 250 auto ur = m_msg.onUri(cast(const(char)[])buffer[start..i]); 251 if (_expect(ur < 0, false)) return ur; 252 } 253 } 254 mixin(skipSpaces!(ParserError.noVersion)); 255 start = i; 256 257 // VERSION 258 ret = parseToken!(versionRanges, "\r\n")(buffer, i); 259 if (_expect(ret < 0, false)) return ret; 260 static if (__traits(hasMember, m_msg, "onVersion")) 261 { 262 static if (is(typeof(m_msg.onVersion("")) == void)) 263 m_msg.onVersion(cast(const(char)[])buffer[start..i]); 264 else { 265 auto vr = m_msg.onVersion(cast(const(char)[])buffer[start..i]); 266 if (_expect(vr < 0, false)) return vr; 267 } 268 } 269 mixin(advanceNewline); 270 271 // advance buffer after the request line 272 buffer = buffer[i..$]; 273 return 0; 274 } 275 276 auto parseStatusLine(ref const(ubyte)[] buffer) 277 { 278 size_t start, i; 279 280 // VERSION 281 auto ret = parseToken!(versionRanges, ' ')(buffer, i); 282 if (_expect(ret < 0, false)) return ret; 283 if (_expect(start == i, false)) return err(ParserError.noVersion); 284 static if (__traits(hasMember, m_msg, "onVersion")) 285 { 286 static if (is(typeof(m_msg.onVersion("")) == void)) 287 m_msg.onVersion(cast(const(char)[])buffer[start..i]); 288 else { 289 auto r = m_msg.onVersion(cast(const(char)[])buffer[start..i]); 290 if (_expect(r < 0, false)) return r; 291 } 292 } 293 mixin(skipSpaces!(ParserError.noStatus)); 294 start = i; 295 296 // STATUS CODE 297 if (_expect(i+3 >= buffer.length, false)) 298 return err(ParserError.partial); // not enough data - we want at least [:digit:][:digit:][:digit:]<other char> to try to parse 299 300 int code; 301 foreach (j, m; [100, 10, 1]) 302 { 303 if (buffer[i+j] < '0' || buffer[i+j] > '9') return err(ParserError.status); 304 code += (buffer[start+j] - '0') * m; 305 } 306 i += 3; 307 static if (__traits(hasMember, m_msg, "onStatus")) 308 { 309 static if (is(typeof(m_msg.onStatus(code)) == void)) 310 m_msg.onStatus(code); 311 else { 312 auto sr = m_msg.onStatus(code); 313 if (_expect(sr < 0, false)) return sr; 314 } 315 } 316 if (_expect(i == buffer.length, false)) 317 return err(ParserError.partial); 318 if (_expect(buffer[i] != ' ' && buffer[i] != '\r' && buffer[i] != '\n', false)) 319 return err(ParserError.status); // Garbage after status 320 321 start = i; 322 323 // MESSAGE 324 ret = parseToken!("\0\010\012\037\177\177", "\r\n")(buffer, i); 325 if (_expect(ret < 0, false)) return ret; 326 static if (__traits(hasMember, m_msg, "onStatusMsg")) 327 { 328 // remove preceding space (we did't advance over spaces because possibly missing status message) 329 if (i > start) 330 { 331 while (buffer[start] == ' ' && start < i) start++; 332 if (i > start) 333 { 334 static if (is(typeof(m_msg.onStatusMsg("")) == void)) 335 m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]); 336 else { 337 auto smr = m_msg.onStatusMsg(cast(const(char)[])buffer[start..i]); 338 if (_expect(smr < 0, false)) return smr; 339 } 340 } 341 } 342 } 343 mixin(advanceNewline); 344 345 // advance buffer after the status line 346 buffer = buffer[i..$]; 347 return 0; 348 } 349 350 /* 351 * Advances buffer over the token to the next character while checking for valid characters. 352 * On success, buffer index is left on the next character. 353 * 354 * Params: 355 * - ranges = ranges of characters to stop on 356 * - sseRanges = if null, same ranges is used, but they are limited to 8 ranges 357 * - next = next character/s to stop on (must be present in the provided ranges too) 358 * Returns: 0 on success error code otherwise 359 */ 360 int parseToken(string ranges, alias next, string sseRanges = null)(const(ubyte)[] buffer, ref size_t i) pure 361 { 362 version (DigitalMars) { 363 static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this 364 } else pragma(inline, true); 365 366 static immutable charMap = buildValidCharMap(ranges); 367 368 static if (LDC_with_SSE42) 369 { 370 // CT function to prepare input for SIMD vector enum 371 static byte[16] padRanges()(string ranges) 372 { 373 byte[16] res; 374 // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests 375 foreach (i, c; ranges) res[i] = cast(byte)c; 376 return res; 377 } 378 379 static if (sseRanges) alias usedRng = sseRanges; 380 else alias usedRng = ranges; 381 static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long"); 382 static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters"); 383 enum rangesSize = usedRng.length; 384 enum byte16 rngE = padRanges(usedRng); 385 386 if (_expect(buffer.length - i >= 16, true)) 387 { 388 size_t left = (buffer.length - i) & ~15; // round down to multiple of 16 389 byte16 ranges16 = rngE; 390 391 do 392 { 393 byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }(); 394 immutable r = _mm_cmpestri( 395 ranges16, rangesSize, 396 b16, 16, 397 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS 398 ); 399 400 if (r != 16) 401 { 402 i += r; 403 goto FOUND; 404 } 405 i += 16; 406 left -= 16; 407 } 408 while (_expect(left != 0, true)); 409 } 410 } 411 else 412 { 413 // faster unrolled loop to iterate over 8 characters 414 loop: while (_expect(buffer.length - i >= 8, true)) 415 { 416 static foreach (_; 0..8) 417 { 418 if (_expect(!charMap[buffer[i]], false)) goto FOUND; 419 ++i; 420 } 421 } 422 } 423 424 // handle the rest 425 if (_expect(i >= buffer.length, false)) return err(ParserError.partial); 426 427 FOUND: 428 while (true) 429 { 430 static if (is(typeof(next) == char)) { 431 static assert(!charMap[next], "Next character is not in ranges"); 432 if (buffer[i] == next) return 0; 433 } else { 434 static assert(next.length > 0, "Next character not provided"); 435 static foreach (c; next) { 436 static assert(!charMap[c], "Next character is not in ranges"); 437 if (buffer[i] == c) return 0; 438 } 439 } 440 if (_expect(!charMap[buffer[i]], false)) return err(ParserError.token); 441 if (_expect(++i == buffer.length, false)) return err(ParserError.partial); 442 } 443 } 444 445 // advances over new line 446 enum advanceNewline = q{ 447 assert(i < buffer.length); 448 if (_expect(buffer[i] == '\r', true)) 449 { 450 if (_expect(i+1 == buffer.length, false)) return err(ParserError.partial); 451 if (_expect(buffer[i+1] != '\n', false)) return err(ParserError.newLine); 452 i += 2; 453 } 454 else if (buffer[i] == '\n') ++i; 455 else assert(0); 456 }; 457 458 // skips over spaces in the buffer 459 template skipSpaces(ParserError err) 460 { 461 enum skipSpaces = ` 462 do { 463 ++i; 464 if (_expect(buffer.length == i, false)) return err(ParserError.partial); 465 if (_expect(buffer[i] == '\r' || buffer[i] == '\n', false)) return err(` ~ err.stringof ~ `); 466 } while (buffer[i] == ' '); 467 `; 468 } 469 } 470 471 /// 472 @("example") 473 unittest 474 { 475 // init parser 476 auto reqParser = initParser!Msg(); // or `MsgParser!MSG reqParser;` 477 auto resParser = initParser!Msg(); // or `MsgParser!MSG resParser;` 478 479 // parse request 480 string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n"; 481 // returns parsed message header length when parsed sucessfully, -ParserError on error 482 int res = reqParser.parseRequest(data); 483 assert(res == data.length); 484 assert(reqParser.method == "GET"); 485 assert(reqParser.uri == "/foo"); 486 assert(reqParser.minorVer == 1); // HTTP/1.1 487 assert(reqParser.headers.length == 1); 488 assert(reqParser.headers[0].name == "Host"); 489 assert(reqParser.headers[0].value == "127.0.0.1:8090"); 490 491 // parse response 492 data = "HTTP/1.0 200 OK\r\n"; 493 uint lastPos; // store last parsed position for next run 494 res = resParser.parseResponse(data, lastPos); 495 assert(res == -ParserError.partial); // no complete message header yet 496 data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo"; 497 res = resParser.parseResponse(data, lastPos); // starts parsing from previous position 498 assert(res == data.length - 3); // whole message header parsed, body left to be handled based on actual header values 499 assert(resParser.minorVer == 0); // HTTP/1.0 500 assert(resParser.status == 200); 501 assert(resParser.statusMsg == "OK"); 502 assert(resParser.headers.length == 2); 503 assert(resParser.headers[0].name == "Content-Type"); 504 assert(resParser.headers[0].value == "text/plain"); 505 assert(resParser.headers[1].name == "Content-Length"); 506 assert(resParser.headers[1].value == "3"); 507 } 508 509 /** 510 * Parses HTTP version from a slice returned in `onVersion` callback. 511 * 512 * Returns: minor version (0 for HTTP/1.0 or 1 for HTTP/1.1) on success or 513 * `-ParserError.invalidVersion` on error 514 */ 515 int parseHttpVersion(const(char)[] ver) pure 516 { 517 if (_expect(ver.length != 8, false)) return err(ParserError.invalidVersion); 518 519 static foreach (i, c; "HTTP/1.") 520 if (_expect(ver[i] != c, false)) return err(ParserError.invalidVersion); 521 522 if (_expect(ver[7] < '0' || ver[7] > '9', false)) return err(ParserError.invalidVersion); 523 return ver[7] - '0'; 524 } 525 526 @("parseHttpVersion") 527 unittest 528 { 529 assert(parseHttpVersion("FOO") < 0); 530 assert(parseHttpVersion("HTTP/1.") < 0); 531 assert(parseHttpVersion("HTTP/1.12") < 0); 532 assert(parseHttpVersion("HTTP/1.a") < 0); 533 assert(parseHttpVersion("HTTP/2.0") < 0); 534 assert(parseHttpVersion("HTTP/1.00") < 0); 535 assert(parseHttpVersion("HTTP/1.0") == 0); 536 assert(parseHttpVersion("HTTP/1.1") == 1); 537 } 538 539 version (CI_MAIN) 540 { 541 // workaround for dub not supporting unittests with betterC 542 version (D_BetterC) 543 { 544 extern(C) void main() { 545 import core.stdc.stdio; 546 static foreach(u; __traits(getUnitTests, httparsed)) { 547 debug printf("testing '" ~ __traits(getAttributes, u)[0] ~ "'\n"); 548 u(); 549 } 550 debug printf("All unit tests have been run successfully.\n"); 551 } 552 } 553 else 554 { 555 void main() 556 { 557 version (unittest) {} // run automagically 558 else 559 { 560 import core.stdc.stdio; 561 562 // just a compilation test 563 auto reqParser = initParser!Msg(); 564 auto resParser = initParser!Msg(); 565 566 string data = "GET /foo HTTP/1.1\r\nHost: 127.0.0.1:8090\r\n\r\n"; 567 int res = reqParser.parseRequest(data); 568 assert(res == data.length); 569 570 data = "HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\nContent-Length: 3\r\n\r\nfoo"; 571 res = resParser.parseResponse(data); 572 assert(res == data.length - 3); 573 () @trusted { printf("Test app works\n"); }(); 574 } 575 } 576 } 577 } 578 579 private: 580 581 int err(ParserError e) pure { pragma(inline, true); return -(cast(int)e); } 582 583 /// Builds valid char map from the provided ranges of invalid ones 584 bool[256] buildValidCharMap()(string invalidRanges) 585 { 586 assert(invalidRanges.length % 2 == 0, "Uneven ranges"); 587 bool[256] res = true; 588 589 for (int i=0; i < invalidRanges.length; i+=2) 590 for (int j=invalidRanges[i]; j <= invalidRanges[i+1]; ++j) 591 res[j] = false; 592 return res; 593 } 594 595 @("buildValidCharMap") 596 unittest 597 { 598 string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff"; 599 assert(buildValidCharMap(ranges) == 600 cast(bool[])[ 601 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 602 0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, 603 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1, 604 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0, 605 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 606 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 607 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 608 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 609 ]); 610 } 611 612 version (unittest) version = WITH_MSG; 613 else version (CI_MAIN) version = WITH_MSG; 614 615 version (WITH_MSG) 616 { 617 // define our message content handler 618 struct Header 619 { 620 const(char)[] name; 621 const(char)[] value; 622 } 623 624 // Just store slices of parsed message header 625 struct Msg 626 { 627 @safe pure nothrow @nogc: 628 void onMethod(const(char)[] method) { this.method = method; } 629 void onUri(const(char)[] uri) { this.uri = uri; } 630 int onVersion(const(char)[] ver) 631 { 632 minorVer = parseHttpVersion(ver); 633 return minorVer >= 0 ? 0 : minorVer; 634 } 635 void onHeader(const(char)[] name, const(char)[] value) { 636 this.m_headers[m_headersLength].name = name; 637 this.m_headers[m_headersLength++].value = value; 638 } 639 void onStatus(int status) { this.status = status; } 640 void onStatusMsg(const(char)[] statusMsg) { this.statusMsg = statusMsg; } 641 642 const(char)[] method; 643 const(char)[] uri; 644 int minorVer; 645 int status; 646 const(char)[] statusMsg; 647 648 private { 649 Header[32] m_headers; 650 size_t m_headersLength; 651 } 652 653 Header[] headers() return { return m_headers[0..m_headersLength]; } 654 } 655 656 enum Test { err, complete, partial } 657 } 658 659 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c 660 661 @("Request") 662 unittest 663 { 664 auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow @nogc 665 { 666 auto parser = initParser!Msg(); 667 auto res = parser.parseRequest(data); 668 // if (res < 0) writeln("Err: ", cast(ParserError)(-res)); 669 final switch (test) 670 { 671 case Test.err: assert(res < -ParserError.partial); break; 672 case Test.partial: assert(res == -ParserError.partial); break; 673 case Test.complete: assert(res == data.length - additional); break; 674 } 675 676 return parser.msg; 677 } 678 679 // simple 680 { 681 auto req = parse("GET / HTTP/1.0\r\n\r\n"); 682 assert(req.headers.length == 0); 683 assert(req.method == "GET"); 684 assert(req.uri == "/"); 685 assert(req.minorVer == 0); 686 } 687 688 // parse headers 689 { 690 auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nCookie: \r\n\r\n"); 691 assert(req.method == "GET"); 692 assert(req.uri == "/hoge"); 693 assert(req.minorVer == 1); 694 assert(req.headers.length == 2); 695 assert(req.headers[0] == Header("Host", "example.com")); 696 assert(req.headers[1] == Header("Cookie", "")); 697 } 698 699 // multibyte included 700 { 701 auto req = parse("GET /hoge HTTP/1.1\r\nHost: example.com\r\nUser-Agent: \343\201\262\343/1.0\r\n\r\n"); 702 assert(req.method == "GET"); 703 assert(req.uri == "/hoge"); 704 assert(req.minorVer == 1); 705 assert(req.headers.length == 2); 706 assert(req.headers[0] == Header("Host", "example.com")); 707 assert(req.headers[1] == Header("User-Agent", "\343\201\262\343/1.0")); 708 } 709 710 //multiline 711 { 712 auto req = parse("GET / HTTP/1.0\r\nfoo: \r\nfoo: b\r\n \tc\r\n\r\n"); 713 assert(req.method == "GET"); 714 assert(req.uri == "/"); 715 assert(req.minorVer == 0); 716 assert(req.headers.length == 3); 717 assert(req.headers[0] == Header("foo", "")); 718 assert(req.headers[1] == Header("foo", "b")); 719 assert(req.headers[2] == Header(null, " \tc")); 720 } 721 722 // header name with trailing space 723 parse("GET / HTTP/1.0\r\nfoo : ab\r\n\r\n", Test.err); 724 725 // incomplete 726 assert(parse("\r", Test.partial).method == null); 727 assert(parse("\r\n", Test.partial).method == null); 728 assert(parse("\r\nGET", Test.partial).method == null); 729 assert(parse("GET", Test.partial).method == null); 730 assert(parse("GET ", Test.partial).method == "GET"); 731 assert(parse("GET /", Test.partial).uri == null); 732 assert(parse("GET / ", Test.partial).uri == "/"); 733 assert(parse("GET / HTTP/1.1", Test.partial).minorVer == 0); 734 assert(parse("GET / HTTP/1.1\r", Test.partial).minorVer == 1); 735 assert(parse("GET / HTTP/1.1\r\n", Test.partial).minorVer == 1); 736 parse("GET / HTTP/1.0\r\n\r", Test.partial); 737 parse("GET / HTTP/1.0\r\n\r\n", Test.complete); 738 parse(" / HTTP/1.0\r\n\r\n", Test.err); // empty method 739 parse("GET HTTP/1.0\r\n\r\n", Test.err); // empty request target 740 parse("GET / \r\n\r\n", Test.err); // empty version 741 parse("GET / HTTP/1.0\r\n:a\r\n\r\n", Test.err); // empty header name 742 parse("GET / HTTP/1.0\r\n :a\r\n\r\n", Test.err); // empty header name (space only) 743 parse("G\0T / HTTP/1.0\r\n\r\n", Test.err); // NUL in method 744 parse("G\tT / HTTP/1.0\r\n\r\n", Test.err); // tab in method 745 parse("GET /\x7f HTTP/1.0\r\n\r\n", Test.err); // DEL in uri 746 parse("GET / HTTP/1.0\r\na\0b: c\r\n\r\n", Test.err); // NUL in header name 747 parse("GET / HTTP/1.0\r\nab: c\0d\r\n\r\n", Test.err); // NUL in header value 748 parse("GET / HTTP/1.0\r\na\033b: c\r\n\r\n", Test.err); // CTL in header name 749 parse("GET / HTTP/1.0\r\nab: c\033\r\n\r\n", Test.err); // CTL in header value 750 parse("GET / HTTP/1.0\r\n/: 1\r\n\r\n", Test.err); // invalid char in header value 751 parse("GET / HTTP/1.0\r\n\r\n", Test.complete); // multiple spaces between tokens 752 753 // accept MSB chars 754 { 755 auto res = parse("GET /\xa0 HTTP/1.0\r\nh: c\xa2y\r\n\r\n"); 756 assert(res.method == "GET"); 757 assert(res.uri == "/\xa0"); 758 assert(res.minorVer == 0); 759 assert(res.headers.length == 1); 760 assert(res.headers[0] == Header("h", "c\xa2y")); 761 } 762 763 parse("GET / HTTP/1.0\r\n\x7b: 1\r\n\r\n", Test.err); // disallow '{' 764 765 // exclude leading and trailing spaces in header value 766 { 767 auto req = parse("GET / HTTP/1.0\r\nfoo: a \t \r\n\r\n"); 768 assert(req.headers[0].value == "a"); 769 } 770 771 // leave the body intact 772 parse("GET / HTTP/1.0\r\n\r\nfoo bar baz", Test.complete, "foo bar baz".length); 773 774 // realworld 775 { 776 auto req = parse("GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n"); 777 assert(req.method == "GET"); 778 assert(req.uri == "/cookies"); 779 assert(req.minorVer == 1); 780 assert(req.headers[0] == Header("Host", "127.0.0.1:8090")); 781 assert(req.headers[1] == Header("Connection", "keep-alive")); 782 assert(req.headers[2] == Header("Cache-Control", "max-age=0")); 783 assert(req.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); 784 assert(req.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17")); 785 assert(req.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch")); 786 assert(req.headers[6] == Header("Accept-Language", "en-US,en;q=0.8")); 787 assert(req.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); 788 assert(req.headers[8] == Header("Cookie", "name=wookie")); 789 } 790 791 // newline 792 { 793 auto req = parse("GET / HTTP/1.0\nfoo: a\n\n"); 794 } 795 } 796 797 @("Response") 798 // Tests from https://github.com/h2o/picohttpparser/blob/master/test.c 799 unittest 800 { 801 auto parse(string data, Test test = Test.complete, int additional = 0) @safe nothrow 802 { 803 auto parser = initParser!Msg(); 804 805 auto res = parser.parseResponse(data); 806 // if (res < 0) writeln("Err: ", cast(ParserError)(-res)); 807 final switch (test) 808 { 809 case Test.err: assert(res < -ParserError.partial); break; 810 case Test.partial: assert(res == -ParserError.partial); break; 811 case Test.complete: assert(res == data.length - additional); break; 812 } 813 814 return parser.msg; 815 } 816 817 // simple 818 { 819 auto res = parse("HTTP/1.0 200 OK\r\n\r\n"); 820 assert(res.headers.length == 0); 821 assert(res.status == 200); 822 assert(res.minorVer == 0); 823 assert(res.statusMsg == "OK"); 824 } 825 826 parse("HTTP/1.0 200 OK\r\n\r", Test.partial); // partial 827 828 // parse headers 829 { 830 auto res = parse("HTTP/1.1 200 OK\r\nHost: example.com\r\nCookie: \r\n\r\n"); 831 assert(res.headers.length == 2); 832 assert(res.minorVer == 1); 833 assert(res.status == 200); 834 assert(res.statusMsg == "OK"); 835 assert(res.headers[0] == Header("Host", "example.com")); 836 assert(res.headers[1] == Header("Cookie", "")); 837 } 838 839 // parse multiline 840 { 841 auto res = parse("HTTP/1.0 200 OK\r\nfoo: \r\nfoo: b\r\n \tc\r\n\r\n"); 842 assert(res.headers.length == 3); 843 assert(res.minorVer == 0); 844 assert(res.status == 200); 845 assert(res.statusMsg == "OK"); 846 assert(res.headers[0] == Header("foo", "")); 847 assert(res.headers[1] == Header("foo", "b")); 848 assert(res.headers[2] == Header(null, " \tc")); 849 } 850 851 // internal server error 852 { 853 auto res = parse("HTTP/1.0 500 Internal Server Error\r\n\r\n"); 854 assert(res.headers.length == 0); 855 assert(res.minorVer == 0); 856 assert(res.status == 500); 857 assert(res.statusMsg == "Internal Server Error"); 858 } 859 860 parse("H", Test.partial); // incomplete 1 861 parse("HTTP/1.", Test.partial); // incomplete 2 862 assert(parse("HTTP/1.1", Test.partial).minorVer == 0); // incomplete 3 - differs from picohttpparser as we don't parse exact version 863 assert(parse("HTTP/1.1 ", Test.partial).minorVer == 1); // incomplete 4 864 parse("HTTP/1.1 2", Test.partial); // incomplete 5 865 assert(parse("HTTP/1.1 200", Test.partial).status == 0); // incomplete 6 866 assert(parse("HTTP/1.1 200 ", Test.partial).status == 200); // incomplete 7 867 assert(parse("HTTP/1.1 200\r", Test.partial).status == 200); // incomplete 7.1 868 parse("HTTP/1.1 200 O", Test.partial); // incomplete 8 869 assert(parse("HTTP/1.1 200 OK\r", Test.partial).statusMsg == "OK"); // incomplete 9 - differs from picohttpparser 870 assert(parse("HTTP/1.1 200 OK\r\n", Test.partial).statusMsg == "OK"); // incomplete 10 871 assert(parse("HTTP/1.1 200 OK\n", Test.partial).statusMsg == "OK"); // incomplete 11 872 assert(parse("HTTP/1.1 200 OK\r\nA: 1\r", Test.partial).headers.length == 0); // incomplete 11 873 parse("HTTP/1.1 200 OK\r\n\r\n", Test.complete); // multiple spaces between tokens 874 875 // incomplete 12 876 { 877 auto res = parse("HTTP/1.1 200 OK\r\nA: 1\r\n", Test.partial); 878 assert(res.headers.length == 1); 879 assert(res.headers[0] == Header("A", "1")); 880 } 881 882 // slowloris (incomplete) 883 { 884 auto parser = initParser!Msg(); 885 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n") == -ParserError.partial); 886 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r") == -ParserError.partial); 887 assert(parser.parseResponse("HTTP/1.0 200 OK\r\n\r\nblabla") == "HTTP/1.0 200 OK\r\n\r\n".length); 888 } 889 890 parse("HTTP/1. 200 OK\r\n\r\n", Test.err); // invalid http version 891 parse("HTTP/1.2z 200 OK\r\n\r\n", Test.err); // invalid http version 2 892 parse("HTTP/1.1 OK\r\n\r\n", Test.err); // no status code 893 894 assert(parse("HTTP/1.1 200\r\n\r\n").statusMsg == ""); // accept missing trailing whitespace in status-line 895 parse("HTTP/1.1 200X\r\n\r\n", Test.err); // garbage after status 1 896 parse("HTTP/1.1 200X \r\n\r\n", Test.err); // garbage after status 2 897 parse("HTTP/1.1 200X OK\r\n\r\n", Test.err); // garbage after status 3 898 899 assert(parse("HTTP/1.1 200 OK\r\nbar: \t b\t \t\r\n\r\n").headers[0].value == "b"); // exclude leading and trailing spaces in header value 900 } 901 902 @("Incremental") 903 unittest 904 { 905 string req = "GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\nUser-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17\r\nAccept-Encoding: gzip,deflate,sdch\r\nAccept-Language: en-US,en;q=0.8\r\nAccept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3\r\nCookie: name=wookie\r\n\r\n"; 906 auto parser = initParser!Msg(); 907 uint parsed; 908 auto res = parser.parseRequest(req[0.."GET /cookies HTTP/1.1\r\nHost: 127.0.0.1:8090\r\nConn".length], parsed); 909 assert(res == -ParserError.partial); 910 assert(parser.msg.method == "GET"); 911 assert(parser.msg.uri == "/cookies"); 912 assert(parser.msg.minorVer == 1); 913 assert(parser.msg.headers.length == 1); 914 assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090")); 915 916 res = parser.parseRequest(req, parsed); 917 assert(res == req.length); 918 assert(parser.msg.method == "GET"); 919 assert(parser.msg.uri == "/cookies"); 920 assert(parser.msg.minorVer == 1); 921 assert(parser.msg.headers[0] == Header("Host", "127.0.0.1:8090")); 922 assert(parser.msg.headers[1] == Header("Connection", "keep-alive")); 923 assert(parser.msg.headers[2] == Header("Cache-Control", "max-age=0")); 924 assert(parser.msg.headers[3] == Header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")); 925 assert(parser.msg.headers[4] == Header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17")); 926 assert(parser.msg.headers[5] == Header("Accept-Encoding", "gzip,deflate,sdch")); 927 assert(parser.msg.headers[6] == Header("Accept-Language", "en-US,en;q=0.8")); 928 assert(parser.msg.headers[7] == Header("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3")); 929 assert(parser.msg.headers[8] == Header("Cookie", "name=wookie")); 930 } 931 932 //** used intrinsics **// 933 934 version(LDC) 935 { 936 public import core.simd; 937 public import ldc.intrinsics; 938 import ldc.gccbuiltins_x86; 939 940 enum LDC_with_SSE42 = __traits(targetHasFeature, "sse4.2"); 941 942 // These specify the type of data that we're comparing. 943 enum _SIDD_UBYTE_OPS = 0x00; 944 enum _SIDD_UWORD_OPS = 0x01; 945 enum _SIDD_SBYTE_OPS = 0x02; 946 enum _SIDD_SWORD_OPS = 0x03; 947 948 // These specify the type of comparison operation. 949 enum _SIDD_CMP_EQUAL_ANY = 0x00; 950 enum _SIDD_CMP_RANGES = 0x04; 951 enum _SIDD_CMP_EQUAL_EACH = 0x08; 952 enum _SIDD_CMP_EQUAL_ORDERED = 0x0c; 953 954 // These are used in _mm_cmpXstri() to specify the return. 955 enum _SIDD_LEAST_SIGNIFICANT = 0x00; 956 enum _SIDD_MOST_SIGNIFICANT = 0x40; 957 958 // These macros are used in _mm_cmpXstri() to specify the return. 959 enum _SIDD_BIT_MASK = 0x00; 960 enum _SIDD_UNIT_MASK = 0x40; 961 962 // some definition aliases to commonly used names 963 alias __m128i = int4; 964 965 // some used methods aliases 966 alias _expect = llvm_expect; 967 alias _mm_loadu_si128 = loadUnaligned!__m128i; 968 alias _mm_cmpestri = __builtin_ia32_pcmpestri128; 969 } 970 else 971 { 972 enum LDC_with_SSE42 = false; 973 974 T _expect(T)(T val, T expected_val) if (__traits(isIntegral, T)) 975 { 976 pragma(inline, true); 977 return val; 978 } 979 } 980 981 pragma(msg, "SSE: ", LDC_with_SSE42);