filterlib.php 8.75 KB
Newer Older
moodler's avatar
moodler committed
1
2
<?php // $Id$
      // Contains special functions that are particularly useful to filters
3

moodler's avatar
moodler committed
4
5
6
7
8
9

/**
 * This is just a little object to define a phrase and some instructions 
 * for how to process it.  Filters can create an array of these to pass 
 * to the filter_phrases function below.
 **/
10
11
12
13
14
15
16
17
class filterobject {
    var $phrase;
    var $hreftagbegin;
    var $hreftagend;
    var $casesensitive;
    var $fullmatch;

    /// a constructor just because I like constructing
moodler's avatar
moodler committed
18
19
20
21
22
23
    function filterobject($phrase, $hreftagbegin='<span class="highlight">', 
                                   $hreftagend='</span>', 
                                   $casesensitive=false, 
                                   $fullmatch=false) {

        $this->phrase        = $phrase;
24
25
26
27
28
29
30
31
32
        $this->hreftagbegin  = $hreftagbegin;
        $this->hreftagend    = $hreftagend;
        $this->casesensitive = $casesensitive;
        $this->fullmatch     = $fullmatch;
    }
}


/**
moodler's avatar
moodler committed
33
34
35
36
37
38
39
 * Process phrases intelligently found within a HTML text (such as adding links)
 *
 * param  text             the text that we are filtering
 * param  link_array       an array of filterobjects
 * param  ignoretagsopen   an array of opening tags that we should ignore while filtering
 * param  ignoretagsclose  an array of corresponding closing tags
 **/
40
41
function filter_phrases ($text, $link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {

42
43
    global $CFG;

44
45
46
47
48
49
50
51
52
53
54
55
56
57
/// A list of open/close tags that we should not replace within
/// No reason why you can't put full preg expressions in here too
/// eg '<script(.+?)>' to match any type of script tag
    $filterignoretagsopen  = array('<head>' , '<nolink>' , '<span class="nolink">');
    $filterignoretagsclose = array('</head>', '</nolink>', '</span>');

/// Invalid prefixes and suffixes for the fullmatch searches
    $filterinvalidprefixes = '([a-zA-Z0-9])';
    $filterinvalidsuffixes  = '([a-zA-Z0-9])';


/// Add the user defined ignore tags to the default list
/// Unless specified otherwise, we will not replace within <a></a> tags
    if ( $ignoretagsopen === NULL ) {
58
59
        //$ignoretagsopen  = array('<a(.+?)>');
        $ignoretagsopen  = array('<a[^>]+?>');
60
61
62
63
        $ignoretagsclose = array('</a>');
    }
    
    if ( is_array($ignoretagsopen) ) {
64
65
        foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
        foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
66
67
68
69
70
    }


/// Remove everything enclosed by the ignore tags from $text
    $ignoretags = array();
71
72
    foreach ($filterignoretagsopen as $ikey=>$opentag) {
        $closetag = $filterignoretagsclose[$ikey];
73
    /// form regular expression
74
        $opentag  = str_replace('/','\/',$opentag); // delimit forward slashes
75
76
77
78
79
        $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
        $pregexp = '/'.$opentag.'(.+?)'.$closetag.'/is';
        
        preg_match_all($pregexp, $text, $list_of_ignores);
        foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
80
            $ignoretags['<#'.$ikey.'.'.$key.'#>'] = $value;
81
82
83
84
85
86
87
88
89
        }
        if (!empty($ignoretags)) {
            $text = str_replace($ignoretags,array_keys($ignoretags),$text);
        }
    }


/// Remove tags from $text
    $tags = array();
90
    preg_match_all('/<[^\#](.*?)>/is',$text,$list_of_tags);
91
92
93
94
95
96
97
98
99
100
    foreach (array_unique($list_of_tags[0]) as $key=>$value) {
        $tags['<|'.$key.'|>'] = $value;
    }
    if (!empty($tags)) {
        $text = str_replace($tags,array_keys($tags),$text);
    }


/// Time to cycle through each phrase to be linked
    foreach ($link_array as $linkobject) {
101

102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    /// Set some defaults if certain properties are missing
    /// Properties may be missing if the filterobject class has not been used to construct the object
        if (!isset($linkobject->phrase)) {
            continue;
        }
        if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
            $linkobject->hreftagbegin = '<span class="highlight"';
            $linkobject->hreftagend   = '</span>';
        }
        if (!isset($linkobject->casesensitive)) {
            $linkobject->casesensitive = false;
        }
        if (!isset($linkobject->fullmatch)) {
            $linkobject->fullmatch = false;
        }


    /// Avoid integers < 1000 to be linked. See bug 1446.
        $intcurrent = intval($linkobject->phrase);
        if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
            continue;
        }


    /// Strip tags out of the phrase
        $linkobject->phrase = strip_tags($linkobject->phrase);

    /// Quote any regular expression characters and the delimiter
        $linkobject->phrase = preg_quote($linkobject->phrase, '/');
    
132
133
134
135
136
137
138
    /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
        if (!empty($CFG->filtermatchoneperpage)) {
            if (!empty($_REQUEST['LINKEDPHRASES']) && in_array($linkobject->phrase,$_REQUEST['LINKEDPHRASES'])) {
                continue;
            }
        }

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    /// Regular expression modifiers
        $modifiers = ($linkobject->casesensitive) ? 's' : 'is';
    
    /// Do we need to do a fullmatch?
    /// If yes then go through and remove any non full matching entries
        if ($linkobject->fullmatch) {
            $notfullmatches = array();
            $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->phrase.')|('.$linkobject->phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;

            preg_match_all($regexp,$text,$list_of_notfullmatches);

            if ($list_of_notfullmatches) {
                foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
                    $notfullmatches['<*'.$key.'*>'] = $value;
                }
                if (!empty($notfullmatches)) {
                    $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
                }
            }
        }

    /// Finally we do our highlighting
161
162
        if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
            $resulttext = preg_replace('/('.$linkobject->phrase.')/'.$modifiers, 
163
164
                                      $linkobject->hreftagbegin.'$1'.$linkobject->hreftagend, $text, 1);
        } else {
165
            $resulttext = preg_replace('/('.$linkobject->phrase.')/'.$modifiers, 
166
167
                                      $linkobject->hreftagbegin.'$1'.$linkobject->hreftagend, $text);
        }
168

169
170
171
172
173
174
175
176
177
    /// If $CFG->filtermatchoneperpage, save linked phrases to request
        if (!empty($CFG->filtermatchoneperpage)) {
            if ($resulttext != $text) { //Texts are different so we have linked the phrase
                $_REQUEST['LINKEDPHRASES'][] = $linkobject->phrase;
            }
        }

    /// Set $text to $resulttext
        $text = $resulttext;
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192

    /// Replace the not full matches before cycling to next link object
        if (!empty($notfullmatches)) {
            $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
            unset($notfullmatches);
        }


    /// We need to remove any tags we've just added
        if (!isset($newtagsarray)) {
            $newtagsarray = array();
        }
        $newtagsprefix = (string)(count($newtagsarray) + 1);
        $newtags = array();
        preg_match_all('/<(.+?)>/is',$text,$list_of_newtags);
193
194
        foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
            $newtags['<%'.$newtagsprefix.'.'.$ntkey.'%>'] = $value;
195
196
197
198
199
        }
        if (!empty($newtags)) {
            $text = str_replace($newtags,array_keys($newtags),$text);
            $newtagsarray[] = $newtags;
        }
200
        unset($newtags);
201
    
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
    }


/// Rebuild the text with all the excluded areas

    if (!empty($newtagsarray)) {
        $newtagsarray = array_reverse($newtagsarray, true);
        foreach ($newtagsarray as $newtags) {
            $text = str_replace(array_keys($newtags), $newtags, $text);
        }
    }

    if (!empty($tags)) {
        $text = str_replace(array_keys($tags),$tags,$text);
    }
    if (!empty($ignoretags)) {
        $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
    }

    return $text;

}

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250


function filter_remove_duplicates($linkarray) {

    $concepts  = array(); // keep a record of concepts as we cycle through
    $lconcepts = array(); // a lower case version for case insensitive

    $cleanlinks = array();
    
    foreach ($linkarray as $key=>$filterobject) {
        if ($filterobject->casesensitive) {
            $exists = in_array($filterobject->phrase, $concepts);
        } else {
            $exists = in_array(strtolower($filterobject->phrase), $lconcepts);
        }
        
        if (!$exists) {
            $cleanlinks[] = $filterobject;
            $concepts[] = $filterobject->phrase;
            $lconcepts[] = strtolower($filterobject->phrase);
        }
    }

    return $cleanlinks;
}

251
?>