1 | /* Byte-wise substring search, using the Two-Way algorithm. |
---|
2 | * Copyright (C) 2008 Eric Blake |
---|
3 | * Permission to use, copy, modify, and distribute this software |
---|
4 | * is freely granted, provided that this notice is preserved. |
---|
5 | */ |
---|
6 | |
---|
7 | /* |
---|
8 | FUNCTION |
---|
9 | <<memmem>>---find memory segment |
---|
10 | |
---|
11 | INDEX |
---|
12 | memmem |
---|
13 | |
---|
14 | SYNOPSIS |
---|
15 | #include <string.h> |
---|
16 | char *memmem(const void *<[s1]>, size_t <[l1]>, const void *<[s2]>, |
---|
17 | size_t <[l2]>); |
---|
18 | |
---|
19 | DESCRIPTION |
---|
20 | |
---|
21 | Locates the first occurrence in the memory region pointed to |
---|
22 | by <[s1]> with length <[l1]> of the sequence of bytes pointed |
---|
23 | to by <[s2]> of length <[l2]>. If you already know the |
---|
24 | lengths of your haystack and needle, <<memmem>> can be much |
---|
25 | faster than <<strstr>>. |
---|
26 | |
---|
27 | RETURNS |
---|
28 | Returns a pointer to the located segment, or a null pointer if |
---|
29 | <[s2]> is not found. If <[l2]> is 0, <[s1]> is returned. |
---|
30 | |
---|
31 | PORTABILITY |
---|
32 | <<memmem>> is a newlib extension. |
---|
33 | |
---|
34 | <<memmem>> requires no supporting OS subroutines. |
---|
35 | |
---|
36 | QUICKREF |
---|
37 | memmem pure |
---|
38 | */ |
---|
39 | |
---|
40 | #include <string.h> |
---|
41 | |
---|
42 | #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__) |
---|
43 | # define RETURN_TYPE void * |
---|
44 | # define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l)) |
---|
45 | # include "str-two-way.h" |
---|
46 | #endif |
---|
47 | |
---|
48 | void * |
---|
49 | memmem (const void *haystack_start, |
---|
50 | size_t haystack_len, |
---|
51 | const void *needle_start, |
---|
52 | size_t needle_len) |
---|
53 | { |
---|
54 | /* Abstract memory is considered to be an array of 'unsigned char' values, |
---|
55 | not an array of 'char' values. See ISO C 99 section 6.2.6.1. */ |
---|
56 | const unsigned char *haystack = (const unsigned char *) haystack_start; |
---|
57 | const unsigned char *needle = (const unsigned char *) needle_start; |
---|
58 | |
---|
59 | if (needle_len == 0) |
---|
60 | /* The first occurrence of the empty string is deemed to occur at |
---|
61 | the beginning of the string. */ |
---|
62 | return (void *) haystack; |
---|
63 | |
---|
64 | #if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__) |
---|
65 | |
---|
66 | /* Less code size, but quadratic performance in the worst case. */ |
---|
67 | while (needle_len <= haystack_len) |
---|
68 | { |
---|
69 | if (!memcmp (haystack, needle, needle_len)) |
---|
70 | return (void *) haystack; |
---|
71 | haystack++; |
---|
72 | haystack_len--; |
---|
73 | } |
---|
74 | return NULL; |
---|
75 | |
---|
76 | #else /* compilation for speed */ |
---|
77 | |
---|
78 | /* Larger code size, but guaranteed linear performance. */ |
---|
79 | |
---|
80 | /* Sanity check, otherwise the loop might search through the whole |
---|
81 | memory. */ |
---|
82 | if (haystack_len < needle_len) |
---|
83 | return NULL; |
---|
84 | |
---|
85 | /* Use optimizations in memchr when possible, to reduce the search |
---|
86 | size of haystack using a linear algorithm with a smaller |
---|
87 | coefficient. However, avoid memchr for long needles, since we |
---|
88 | can often achieve sublinear performance. */ |
---|
89 | if (needle_len < LONG_NEEDLE_THRESHOLD) |
---|
90 | { |
---|
91 | haystack = memchr (haystack, *needle, haystack_len); |
---|
92 | if (!haystack || needle_len == 1) |
---|
93 | return (void *) haystack; |
---|
94 | haystack_len -= haystack - (const unsigned char *) haystack_start; |
---|
95 | if (haystack_len < needle_len) |
---|
96 | return NULL; |
---|
97 | return two_way_short_needle (haystack, haystack_len, needle, needle_len); |
---|
98 | } |
---|
99 | return two_way_long_needle (haystack, haystack_len, needle, needle_len); |
---|
100 | #endif /* compilation for speed */ |
---|
101 | } |
---|