2 Copyright (c) 2003-2011, CKSource - Frederico Knabben. All rights reserved.
3 For licensing, see LICENSE.html or http://ckeditor.com/license
8 var fragmentPrototype
= CKEDITOR
.htmlParser
.fragment
.prototype,
9 elementPrototype
= CKEDITOR
.htmlParser
.element
.prototype;
11 fragmentPrototype
.onlyChild
= elementPrototype
.onlyChild = function()
13 var children
= this.children
,
14 count
= children
.length
,
15 firstChild
= ( count
== 1 ) && children
[ 0 ];
16 return firstChild
|| null;
19 elementPrototype
.removeAnyChildWithName = function( tagName
)
21 var children
= this.children
,
25 for ( var i
= 0; i
< children
.length
; i
++ )
27 child
= children
[ i
];
31 if ( child
.name
== tagName
)
34 children
.splice( i
--, 1 );
36 childs
= childs
.concat( child
.removeAnyChildWithName( tagName
) );
41 elementPrototype
.getAncestor = function( tagNameRegex
)
43 var parent
= this.parent
;
44 while ( parent
&& !( parent
.name
&& parent
.name
.match( tagNameRegex
) ) )
45 parent
= parent
.parent
;
49 fragmentPrototype
.firstChild
= elementPrototype
.firstChild = function( evaluator
)
53 for ( var i
= 0 ; i
< this.children
.length
; i
++ )
55 child
= this.children
[ i
];
56 if ( evaluator( child
) )
58 else if ( child
.name
)
60 child
= child
.firstChild( evaluator
);
69 // Adding a (set) of styles to the element's 'style' attributes.
70 elementPrototype
.addStyle = function( name
, value
, isPrepend
)
72 var styleText
, addingStyleText
= '';
74 if ( typeof value
== 'string' )
75 addingStyleText
+= name
+ ':' + value
+ ';';
79 if ( typeof name
== 'object' )
81 for ( var style
in name
)
83 if ( name
.hasOwnProperty( style
) )
84 addingStyleText
+= style
+ ':' + name
[ style
] + ';';
87 // raw style text form.
89 addingStyleText
+= name
;
94 if ( !this.attributes
)
97 styleText
= this.attributes
.style
|| '';
99 styleText
= ( isPrepend
?
100 [ addingStyleText
, styleText
]
101 : [ styleText
, addingStyleText
] ).join( ';' );
103 this.attributes
.style
= styleText
.replace( /^;|;(?=;)/, '' );
107 * Return the DTD-valid parent tag names of the specified one.
110 CKEDITOR
.dtd
.parentOf = function( tagName
)
113 for ( var tag
in this )
115 if ( tag
.indexOf( '$' ) == -1 && this[ tag
][ tagName
] )
121 // 1. move consistent list item styles up to list root.
122 // 2. clear out unnecessary list item numbering.
123 function postProcessList( list
)
125 var children
= list
.children
,
128 count
= list
.children
.length
,
131 styleTypeRegexp
= /list-style-type:(.*?)(?:;|$)/,
132 stylesFilter
= CKEDITOR
.plugins
.pastefromword
.filters
.stylesFilter
;
134 attrs
= list
.attributes
;
135 if ( styleTypeRegexp
.exec( attrs
.style
) )
138 for ( var i
= 0; i
< count
; i
++ )
140 child
= children
[ i
];
142 if ( child
.attributes
.value
&& Number( child
.attributes
.value
) == i
+ 1 )
143 delete child
.attributes
.value
;
145 match
= styleTypeRegexp
.exec( child
.attributes
.style
);
149 if ( match
[ 1 ] == mergeStyle
|| !mergeStyle
)
150 mergeStyle
= match
[ 1 ];
161 for ( i
= 0; i
< count
; i
++ )
163 attrs
= children
[ i
].attributes
;
164 attrs
.style
&& ( attrs
.style
= stylesFilter( [ [ 'list-style-type'] ] )( attrs
.style
) || '' );
167 list
.addStyle( 'list-style-type', mergeStyle
);
171 var cssLengthRelativeUnit
= /^([.\d]*)+(em|ex|px|gd|rem|vw|vh|vm|ch|mm|cm|in|pt|pc|deg|rad|ms|s|hz|khz){1}?/i;
172 var emptyMarginRegex
= /^(?:\b0[^\s]*\s*){1,4}$/; // e.g. 0px 0pt 0px
173 var romanLiternalPattern
= '^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$',
174 lowerRomanLiteralRegex
= new RegExp( romanLiternalPattern
),
175 upperRomanLiteralRegex
= new RegExp( romanLiternalPattern
.toUpperCase() );
177 var orderedPatterns
= { 'decimal' : /\d+/, 'lower-roman': lowerRomanLiteralRegex
, 'upper-roman': upperRomanLiteralRegex
, 'lower-alpha' : /^[a-z]+$/, 'upper-alpha': /^[A-Z]+$/ },
178 unorderedPatterns
= { 'disc' : /[l\u00B7\u2002]/, 'circle' : /[\u006F\u00D8]/,'square' : /[\u006E\u25C6]/},
179 listMarkerPatterns
= { 'ol' : orderedPatterns
, 'ul' : unorderedPatterns
},
180 romans
= [ [1000, 'M'], [900, 'CM'], [500, 'D'], [400, 'CD'], [100, 'C'], [90, 'XC'], [50, 'L'], [40, 'XL'], [10, 'X'], [9, 'IX'], [5, 'V'], [4, 'IV'], [1, 'I'] ],
181 alpahbets
= "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
183 // Convert roman numbering back to decimal.
184 function fromRoman( str
)
186 str
= str
.toUpperCase();
187 var l
= romans
.length
, retVal
= 0;
188 for ( var i
= 0; i
< l
; ++i
)
190 for ( var j
= romans
[i
], k
= j
[1].length
; str
.substr( 0, k
) == j
[1]; str
= str
.substr( k
) )
196 // Convert alphabet numbering back to decimal.
197 function fromAlphabet( str
)
199 str
= str
.toUpperCase();
200 var l
= alpahbets
.length
, retVal
= 1;
201 for ( var x
= 1; str
.length
> 0; x
*= l
)
203 retVal
+= alpahbets
.indexOf( str
.charAt( str
.length
- 1 ) ) * x
;
204 str
= str
.substr( 0, str
.length
- 1 );
209 var listBaseIndent
= 0,
210 previousListItemMargin
= null,
213 var plugin
= ( CKEDITOR
.plugins
.pastefromword
=
217 // Create a <cke:listbullet> which indicate an list item type.
218 createListBulletMarker : function ( bullet
, bulletText
)
220 var marker
= new CKEDITOR
.htmlParser
.element( 'cke:listbullet' );
221 marker
.attributes
= { 'cke:listsymbol' : bullet
[ 0 ] };
222 marker
.add( new CKEDITOR
.htmlParser
.text( bulletText
) );
226 isListBulletIndicator : function( element
)
228 var styleText
= element
.attributes
&& element
.attributes
.style
;
229 if ( /mso-list\s*:\s*Ignore/i.test( styleText
) )
233 isContainingOnlySpaces : function( element
)
236 return ( ( text
= element
.onlyChild() )
237 && ( /^(:?\s| )+$/ ).test( text
.value
) );
240 resolveList : function( element
)
242 // <cke:listbullet> indicate a list item.
243 var attrs
= element
.attributes
,
246 if ( ( listMarker
= element
.removeAnyChildWithName( 'cke:listbullet' ) )
248 && ( listMarker
= listMarker
[ 0 ] ) )
250 element
.name
= 'cke:li';
254 attrs
.style
= plugin
.filters
.stylesFilter(
256 // Text-indent is not representing list item level any more.
259 // First attempt is to resolve indent level from on a constant margin increment.
260 [ ( /^margin(:?-left)?$/ ), null, function( margin
)
262 // Deal with component/short-hand form.
263 var values
= margin
.split( ' ' );
264 margin
= CKEDITOR
.tools
.convertToPx( values
[ 3 ] || values
[ 1 ] || values
[ 0 ] );
266 // Figure out the indent unit by checking the first time of incrementation.
267 if ( !listBaseIndent
&& previousListItemMargin
!== null && margin
> previousListItemMargin
)
268 listBaseIndent
= margin
- previousListItemMargin
;
270 previousListItemMargin
= margin
;
272 attrs
[ 'cke:indent' ] = listBaseIndent
&& ( Math
.ceil( margin
/ listBaseIndent
) + 1 ) || 1;
274 // The best situation: "mso-list:l0 level1 lfo2" tells the belonged list root, list item indentation, etc.
275 [ ( /^mso-list$/ ), null, function( val
)
277 val
= val
.split( ' ' );
278 var listId
= Number( val
[ 0 ].match( /\d+/ ) ),
279 indent
= Number( val
[ 1 ].match( /\d+/ ) );
283 listId
!== previousListId
&& ( attrs
[ 'cke:reset' ] = 1 );
284 previousListId
= listId
;
286 attrs
[ 'cke:indent' ] = indent
;
288 ] )( attrs
.style
, element
) || '';
291 // First level list item might be presented without a margin.
294 // In case all above doesn't apply.
295 if ( !attrs
[ 'cke:indent' ] )
297 previousListItemMargin
= 0;
298 attrs
[ 'cke:indent' ] = 1;
301 // Inherit attributes from bullet.
302 CKEDITOR
.tools
.extend( attrs
, listMarker
.attributes
);
305 // Current list disconnected.
307 previousListId
= previousListItemMargin
= listBaseIndent
= null;
312 // Providing a shorthand style then retrieve one or more style component values.
313 getStyleComponents
: ( function()
315 var calculator
= CKEDITOR
.dom
.element
.createFromHtml(
316 '<div style="position:absolute;left:-9999px;top:-9999px;"></div>',
318 CKEDITOR
.document
.getBody().append( calculator
);
320 return function( name
, styleValue
, fetchList
)
322 calculator
.setStyle( name
, styleValue
);
324 count
= fetchList
.length
;
325 for ( var i
= 0; i
< count
; i
++ )
326 styles
[ fetchList
[ i
] ] = calculator
.getStyle( fetchList
[ i
] );
332 listDtdParents
: CKEDITOR
.dtd
.parentOf( 'ol' )
337 // Transform a normal list into flat list items only presentation.
338 // E.g. <ul><li>level1<ol><li>level2</li></ol></li> =>
339 // <cke:li cke:listtype="ul" cke:indent="1">level1</cke:li>
340 // <cke:li cke:listtype="ol" cke:indent="2">level2</cke:li>
341 flattenList : function( element
, level
)
343 level
= typeof level
== 'number' ? level
: 1;
345 var attrs
= element
.attributes
,
348 // All list items are of the same type.
349 switch ( attrs
.type
)
352 listStyleType
= 'lower-alpha';
355 listStyleType
= 'decimal';
357 // TODO: Support more list style type from MS-Word.
360 var children
= element
.children
,
363 for ( var i
= 0; i
< children
.length
; i
++ )
365 child
= children
[ i
];
367 if ( child
.name
in CKEDITOR
.dtd
.$listItem
)
369 var attributes
= child
.attributes
,
370 listItemChildren
= child
.children
,
371 count
= listItemChildren
.length
,
372 last
= listItemChildren
[ count
- 1 ];
374 // Move out nested list.
375 if ( last
.name
in CKEDITOR
.dtd
.$list
)
377 element
.add( last
, i
+ 1 );
379 // Remove the parent list item if it's just a holder.
380 if ( !--listItemChildren
.length
)
381 children
.splice( i
--, 1 );
384 child
.name
= 'cke:li';
386 // Inherit numbering from list root on the first list item.
387 attrs
.start
&& !i
&& ( attributes
.value
= attrs
.start
);
389 plugin
.filters
.stylesFilter(
391 [ 'tab-stops', null, function( val
)
393 var margin
= val
.split( ' ' )[ 1 ].match( cssLengthRelativeUnit
);
394 margin
&& ( previousListItemMargin
= CKEDITOR
.tools
.convertToPx( margin
[ 0 ] ) );
396 ( level
== 1 ? [ 'mso-list', null, function( val
)
398 val
= val
.split( ' ' );
399 var listId
= Number( val
[ 0 ].match( /\d+/ ) );
400 listId
!== previousListId
&& ( attributes
[ 'cke:reset' ] = 1 );
401 previousListId
= listId
;
403 ] )( attributes
.style
);
405 attributes
[ 'cke:indent' ] = level
;
406 attributes
[ 'cke:listtype' ] = element
.name
;
407 attributes
[ 'cke:list-style-type' ] = listStyleType
;
410 else if ( child
.name
in CKEDITOR
.dtd
.$list
)
412 // Absorb sub list children.
413 arguments
.callee
.apply( this, [ child
, level
+ 1 ] );
414 children
= children
.slice( 0, i
).concat( child
.children
).concat( children
.slice( i
+ 1 ) );
415 element
.children
= [];
416 for ( var j
= 0, num
= children
.length
; j
< num
; j
++ )
417 element
.add( children
[ j
] );
423 // We're loosing tag name here, signalize this element as a list.
424 attrs
[ 'cke:list' ] = 1;
428 * Try to collect all list items among the children and establish one
429 * or more HTML list structures for them.
432 assembleList : function( element
)
434 var children
= element
.children
, child
,
435 listItem
, // The current processing cke:li element.
437 listItemIndent
, // Indent level of current list item.
439 lastListItem
, // The previous one just been added to the list.
440 list
, // Current staging list and it's parent list if any.
442 previousListStyleType
,
445 // Properties of the list item are to be resolved from the list bullet.
451 for ( var i
= 0; i
< children
.length
; i
++ )
453 child
= children
[ i
];
455 if ( 'cke:li' == child
.name
)
459 listItemAttrs
= listItem
.attributes
;
460 bullet
= listItemAttrs
[ 'cke:listsymbol' ];
461 bullet
= bullet
&& bullet
.match( /^(?:[(]?)([^\s]+?)([.)]?)$/ );
462 listType
= listStyleType
= itemNumeric
= null;
464 if ( listItemAttrs
[ 'cke:ignored' ] )
466 children
.splice( i
--, 1 );
471 // This's from a new list root.
472 listItemAttrs
[ 'cke:reset' ] && ( list
= lastIndent
= lastListItem
= null );
474 // List item indent level might come from a real list indentation or
475 // been resolved from a pseudo list item's margin value, even get
476 // no indentation at all.
477 listItemIndent
= Number( listItemAttrs
[ 'cke:indent' ] );
479 // We're moving out of the current list, cleaning up.
480 if ( listItemIndent
!= lastIndent
)
481 previousListType
= previousListStyleType
= null;
483 // List type and item style are already resolved.
486 listType
= listItemAttrs
[ 'cke:listtype' ] || 'ol';
487 listStyleType
= listItemAttrs
[ 'cke:list-style-type' ];
491 // Probably share the same list style type with previous list item,
492 // give it priority to avoid ambiguous between C(Alpha) and C.(Roman).
493 if ( previousListType
&& listMarkerPatterns
[ previousListType
] [ previousListStyleType
].test( bullet
[ 1 ] ) )
495 listType
= previousListType
;
496 listStyleType
= previousListStyleType
;
500 for ( var type
in listMarkerPatterns
)
502 for ( var style
in listMarkerPatterns
[ type
] )
504 if ( listMarkerPatterns
[ type
][ style
].test( bullet
[ 1 ] ) )
506 // Small numbering has higher priority, when dealing with ambiguous
507 // between C(Alpha) and C.(Roman).
508 if ( type
== 'ol' && ( /alpha|roman/ ).test( style
) )
510 var num
= /roman/.test( style
) ? fromRoman( bullet
[ 1 ] ) : fromAlphabet( bullet
[ 1 ] );
511 if ( !itemNumeric
|| num
< itemNumeric
)
515 listStyleType
= style
;
521 listStyleType
= style
;
529 // Simply use decimal/disc for the rest forms of unrepresentable
530 // numerals, e.g. Chinese..., but as long as there a second part
531 // included, it has a bigger chance of being a order list ;)
532 !listType
&& ( listType
= bullet
[ 2 ] ? 'ol' : 'ul' );
535 previousListType
= listType
;
536 previousListStyleType
= listStyleType
|| ( listType
== 'ol' ? 'decimal' : 'disc' );
537 if ( listStyleType
&& listStyleType
!= ( listType
== 'ol' ? 'decimal' : 'disc' ) )
538 listItem
.addStyle( 'list-style-type', listStyleType
);
540 // Figure out start numbering.
541 if ( listType
== 'ol' && bullet
)
543 switch ( listStyleType
)
546 itemNumeric
= Number( bullet
[ 1 ] );
550 itemNumeric
= fromRoman( bullet
[ 1 ] );
554 itemNumeric
= fromAlphabet( bullet
[ 1 ] );
558 // Always create the numbering, swipe out unnecessary ones later.
559 listItem
.attributes
.value
= itemNumeric
;
562 // Start the list construction.
565 openedLists
.push( list
= new CKEDITOR
.htmlParser
.element( listType
) );
566 list
.add( listItem
);
567 children
[ i
] = list
;
571 if ( listItemIndent
> lastIndent
)
573 openedLists
.push( list
= new CKEDITOR
.htmlParser
.element( listType
) );
574 list
.add( listItem
);
575 lastListItem
.add( list
);
577 else if ( listItemIndent
< lastIndent
)
579 // There might be a negative gap between two list levels. (#4944)
580 var diff
= lastIndent
- listItemIndent
,
582 while ( diff
-- && ( parent
= list
.parent
) )
583 list
= parent
.parent
;
585 list
.add( listItem
);
588 list
.add( listItem
);
590 children
.splice( i
--, 1 );
593 lastListItem
= listItem
;
594 lastIndent
= listItemIndent
;
597 list
= lastIndent
= lastListItem
= null;
600 for ( i
= 0; i
< openedLists
.length
; i
++ )
601 postProcessList( openedLists
[ i
] );
603 list
= lastIndent
= lastListItem
= previousListId
= previousListItemMargin
= listBaseIndent
= null;
607 * A simple filter which always rejecting.
609 falsyFilter : function( value
)
615 * A filter dedicated on the 'style' attribute filtering, e.g. dropping/replacing style properties.
616 * @param styles {Array} in form of [ styleNameRegexp, styleValueRegexp,
617 * newStyleValue/newStyleGenerator, newStyleName ] where only the first
618 * parameter is mandatory.
619 * @param whitelist {Boolean} Whether the {@param styles} will be considered as a white-list.
621 stylesFilter : function( styles
, whitelist
)
623 return function( styleText
, element
)
626 // html-encoded quote might be introduced by 'font-family'
627 // from MS-Word which confused the following regexp. e.g.
628 //'font-family: "Lucida, Console"'
630 .replace( /"/g, '"' )
631 .replace( /\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g,
632 function( match
, name
, value
)
634 name
= name
.toLowerCase();
635 name
== 'font-family' && ( value
= value
.replace( /["']/g, '' ) );
641 for ( var i
= 0 ; i
< styles
.length
; i
++ )
645 namePattern
= styles
[ i
][ 0 ];
646 valuePattern
= styles
[ i
][ 1 ];
647 newValue
= styles
[ i
][ 2 ];
648 newName
= styles
[ i
][ 3 ];
650 if ( name
.match( namePattern
)
651 && ( !valuePattern
|| value
.match( valuePattern
) ) )
653 name
= newName
|| name
;
654 whitelist
&& ( newValue
= newValue
|| value
);
656 if ( typeof newValue
== 'function' )
657 newValue
= newValue( value
, element
, name
);
659 // Return an couple indicate both name and value
661 if ( newValue
&& newValue
.push
)
662 name
= newValue
[ 0 ], newValue
= newValue
[ 1 ];
664 if ( typeof newValue
== 'string' )
665 rules
.push( [ name
, newValue
] );
671 !whitelist
&& rules
.push( [ name
, value
] );
675 for ( var i
= 0 ; i
< rules
.length
; i
++ )
676 rules
[ i
] = rules
[ i
].join( ':' );
677 return rules
.length
?
678 ( rules
.join( ';' ) + ';' ) : false;
683 * Migrate the element by decorate styles on it.
684 * @param styleDefiniton
687 elementMigrateFilter : function ( styleDefiniton
, variables
)
689 return function( element
)
693 new CKEDITOR
.style( styleDefiniton
, variables
)._
.definition
695 element
.name
= styleDef
.element
;
696 CKEDITOR
.tools
.extend( element
.attributes
, CKEDITOR
.tools
.clone( styleDef
.attributes
) );
697 element
.addStyle( CKEDITOR
.style
.getStyleText( styleDef
) );
702 * Migrate styles by creating a new nested stylish element.
703 * @param styleDefinition
705 styleMigrateFilter : function( styleDefinition
, variableName
)
708 var elementMigrateFilter
= this.elementMigrateFilter
;
709 return function( value
, element
)
711 // Build an stylish element first.
712 var styleElement
= new CKEDITOR
.htmlParser
.element( null ),
715 variables
[ variableName
] = value
;
716 elementMigrateFilter( styleDefinition
, variables
)( styleElement
);
717 // Place the new element inside the existing span.
718 styleElement
.children
= element
.children
;
719 element
.children
= [ styleElement
];
724 * A filter which remove cke-namespaced-attribute on
725 * all none-cke-namespaced elements.
729 bogusAttrFilter : function( value
, element
)
731 if ( element
.name
.indexOf( 'cke:' ) == -1 )
736 * A filter which will be used to apply inline css style according the stylesheet
737 * definition rules, is generated lazily when filtering.
739 applyStyleFilter
: null
743 getRules : function( editor
)
745 var dtd
= CKEDITOR
.dtd
,
746 blockLike
= CKEDITOR
.tools
.extend( {}, dtd
.$block
, dtd
.$listItem
, dtd
.$tableContent
),
747 config
= editor
.config
,
748 filters
= this.filters
,
749 falsyFilter
= filters
.falsyFilter
,
750 stylesFilter
= filters
.stylesFilter
,
751 elementMigrateFilter
= filters
.elementMigrateFilter
,
752 styleMigrateFilter
= CKEDITOR
.tools
.bind( this.filters
.styleMigrateFilter
, this.filters
),
753 createListBulletMarker
= this.utils
.createListBulletMarker
,
754 flattenList
= filters
.flattenList
,
755 assembleList
= filters
.assembleList
,
756 isListBulletIndicator
= this.utils
.isListBulletIndicator
,
757 containsNothingButSpaces
= this.utils
.isContainingOnlySpaces
,
758 resolveListItem
= this.utils
.resolveList
,
759 convertToPx = function( value
)
761 value
= CKEDITOR
.tools
.convertToPx( value
);
762 return isNaN( value
) ? value
: value
+ 'px';
764 getStyleComponents
= this.utils
.getStyleComponents
,
765 listDtdParents
= this.utils
.listDtdParents
,
766 removeFontStyles
= config
.pasteFromWordRemoveFontStyles
!== false,
767 removeStyles
= config
.pasteFromWordRemoveStyles
!== false;
773 // Remove script, meta and link elements.
774 [ ( /meta|link|script/ ), '' ]
777 root : function( element
)
779 element
.filterChildren();
780 assembleList( element
);
785 '^' : function( element
)
787 // Transform CSS style declaration to inline style.
788 var applyStyleFilter
;
789 if ( CKEDITOR
.env
.gecko
&& ( applyStyleFilter
= filters
.applyStyleFilter
) )
790 applyStyleFilter( element
);
793 $ : function( element
)
795 var tagName
= element
.name
|| '',
796 attrs
= element
.attributes
;
798 // Convert length unit of width/height on blocks to
799 // a more editor-friendly way (px).
800 if ( tagName
in blockLike
803 attrs
.style
= stylesFilter(
804 [ [ ( /^(:?width|height)$/ ), null, convertToPx
] ] )( attrs
.style
) || '';
807 // Processing headings.
808 if ( tagName
.match( /h\d/ ) )
810 element
.filterChildren();
811 // Is the heading actually a list item?
812 if ( resolveListItem( element
) )
815 // Adapt heading styles to editor's convention.
816 elementMigrateFilter( config
[ 'format_' + tagName
] )( element
);
818 // Remove inline elements which contain only empty spaces.
819 else if ( tagName
in dtd
.$inline
)
821 element
.filterChildren();
822 if ( containsNothingButSpaces( element
) )
825 // Remove element with ms-office namespace,
826 // with it's content preserved, e.g. 'o:p'.
827 else if ( tagName
.indexOf( ':' ) != -1
828 && tagName
.indexOf( 'cke' ) == -1 )
830 element
.filterChildren();
832 // Restore image real link from vml.
833 if ( tagName
== 'v:imagedata' )
835 var href
= element
.attributes
[ 'o:href' ];
837 element
.attributes
.src
= href
;
838 element
.name
= 'img';
844 // Assembling list items into a whole list.
845 if ( tagName
in listDtdParents
)
847 element
.filterChildren();
848 assembleList( element
);
852 // We'll drop any style sheet, but Firefox conclude
853 // certain styles in a single style element, which are
854 // required to be changed into inline ones.
855 'style' : function( element
)
857 if ( CKEDITOR
.env
.gecko
)
859 // Grab only the style definition section.
860 var styleDefSection
= element
.onlyChild().value
.match( /\/\* Style Definitions \*\/([\s\S]*?)\/\*/ ),
861 styleDefText
= styleDefSection
&& styleDefSection
[ 1 ],
862 rules
= {}; // Storing the parsed result.
867 // Remove line-breaks.
868 .replace(/[\n\r]/g,'')
869 // Extract selectors and style properties.
870 .replace( /(.+?)\{(.+?)\}/g,
871 function( rule
, selectors
, styleBlock
)
873 selectors
= selectors
.split( ',' );
874 var length
= selectors
.length
, selector
;
875 for ( var i
= 0; i
< length
; i
++ )
877 // Assume MS-Word mostly generate only simple
878 // selector( [Type selector][Class selector]).
879 CKEDITOR
.tools
.trim( selectors
[ i
] )
880 .replace( /^(\w+)(\.[\w-]+)?$/g,
881 function( match
, tagName
, className
)
883 tagName
= tagName
|| '*';
884 className
= className
.substring( 1, className
.length
);
886 // Reject MS-Word Normal styles.
887 if ( className
.match( /MsoNormal/ ) )
890 if ( !rules
[ tagName
] )
891 rules
[ tagName
] = {};
893 rules
[ tagName
][ className
] = styleBlock
;
895 rules
[ tagName
] = styleBlock
;
900 filters
.applyStyleFilter = function( element
)
902 var name
= rules
[ '*' ] ? '*' : element
.name
,
903 className
= element
.attributes
&& element
.attributes
[ 'class' ],
907 style
= rules
[ name
];
908 if ( typeof style
== 'object' )
909 style
= style
[ className
];
910 // Maintain style rules priorities.
911 style
&& element
.addStyle( style
, true );
919 'p' : function( element
)
921 // This's a fall-back approach to recognize list item in FF3.6,
922 // as it's not perfect as not all list style (e.g. "heading list") is shipped
923 // with this pattern. (#6662)
924 if ( /MsoListParagraph/.exec( element
.attributes
[ 'class' ] ) )
926 var bulletText
= element
.firstChild( function( node
)
928 return node
.type
== CKEDITOR
.NODE_TEXT
&& !containsNothingButSpaces( node
.parent
);
930 var bullet
= bulletText
&& bulletText
.parent
,
931 bulletAttrs
= bullet
&& bullet
.attributes
;
932 bulletAttrs
&& !bulletAttrs
.style
&& ( bulletAttrs
.style
= 'mso-list: Ignore;' );
935 element
.filterChildren();
937 // Is the paragraph actually a list item?
938 if ( resolveListItem( element
) )
941 // Adapt paragraph formatting to editor's convention
942 // according to enter-mode.
943 if ( config
.enterMode
== CKEDITOR
.ENTER_BR
)
945 // We suffer from attribute/style lost in this situation.
947 element
.add( new CKEDITOR
.htmlParser
.element( 'br' ) );
950 elementMigrateFilter( config
[ 'format_' + ( config
.enterMode
== CKEDITOR
.ENTER_P
? 'p' : 'div' ) ] )( element
);
953 'div' : function( element
)
955 // Aligned table with no text surrounded is represented by a wrapper div, from which
956 // table cells inherit as text-align styles, which is wrong.
957 // Instead we use a clear-float div after the table to properly achieve the same layout.
958 var singleChild
= element
.onlyChild();
959 if ( singleChild
&& singleChild
.name
== 'table' )
961 var attrs
= element
.attributes
;
962 singleChild
.attributes
= CKEDITOR
.tools
.extend( singleChild
.attributes
, attrs
);
963 attrs
.style
&& singleChild
.addStyle( attrs
.style
);
965 var clearFloatDiv
= new CKEDITOR
.htmlParser
.element( 'div' );
966 clearFloatDiv
.addStyle( 'clear' ,'both' );
967 element
.add( clearFloatDiv
);
972 'td' : function ( element
)
974 // 'td' in 'thead' is actually <th>.
975 if ( element
.getAncestor( 'thead') )
979 // MS-Word sometimes present list as a mixing of normal list
980 // and pseudo-list, normalize the previous ones into pseudo form.
985 'font' : function( element
)
987 // Drop the font tag if it comes from list bullet text.
988 if ( isListBulletIndicator( element
.parent
) )
994 element
.filterChildren();
996 var attrs
= element
.attributes
,
997 styleText
= attrs
.style
,
998 parent
= element
.parent
;
1000 if ( 'font' == parent
.name
) // Merge nested <font> tags.
1002 CKEDITOR
.tools
.extend( parent
.attributes
,
1003 element
.attributes
);
1004 styleText
&& parent
.addStyle( styleText
);
1005 delete element
.name
;
1007 // Convert the merged into a span with all attributes preserved.
1010 styleText
= styleText
|| '';
1011 // IE's having those deprecated attributes, normalize them.
1014 attrs
.color
!= '#000000' && ( styleText
+= 'color:' + attrs
.color
+ ';' );
1019 styleText
+= 'font-family:' + attrs
.face
+ ';';
1022 // TODO: Mapping size in ranges of xx-small,
1023 // x-small, small, medium, large, x-large, xx-large.
1026 styleText
+= 'font-size:' +
1027 ( attrs
.size
> 3 ? 'large'
1028 : ( attrs
.size
< 3 ? 'small' : 'medium' ) ) + ';';
1032 element
.name
= 'span';
1033 element
.addStyle( styleText
);
1037 'span' : function( element
)
1039 // Remove the span if it comes from list bullet text.
1040 if ( isListBulletIndicator( element
.parent
) )
1043 element
.filterChildren();
1044 if ( containsNothingButSpaces( element
) )
1046 delete element
.name
;
1050 // List item bullet type is supposed to be indicated by
1051 // the text of a span with style 'mso-list : Ignore' or an image.
1052 if ( isListBulletIndicator( element
) )
1054 var listSymbolNode
= element
.firstChild( function( node
)
1056 return node
.value
|| node
.name
== 'img';
1059 var listSymbol
= listSymbolNode
&& ( listSymbolNode
.value
|| 'l.' ),
1060 listType
= listSymbol
&& listSymbol
.match( /^(?:[(]?)([^\s]+?)([.)]?)$/ );
1064 var marker
= createListBulletMarker( listType
, listSymbol
);
1065 // Some non-existed list items might be carried by an inconsequential list, indicate by "mso-hide:all/display:none",
1066 // those are to be removed later, now mark it with "cke:ignored".
1067 var ancestor
= element
.getAncestor( 'span' );
1068 if ( ancestor
&& (/ mso
-hide
:\s
*all
|display
:\s
*none
/).test( ancestor
.attributes
.style
) )
1069 marker
.attributes
[ 'cke:ignored' ] = 1;
1074 // Update the src attribute of image element with href.
1075 var children
= element
.children
,
1076 attrs
= element
.attributes
,
1077 styleText
= attrs
&& attrs
.style
,
1078 firstChild
= children
&& children
[ 0 ];
1080 // Assume MS-Word mostly carry font related styles on <span>,
1081 // adapting them to editor's convention.
1084 attrs
.style
= stylesFilter(
1086 // Drop 'inline-height' style which make lines overlapping.
1088 [ ( /^font-family$/ ), null, !removeFontStyles
? styleMigrateFilter( config
[ 'font_style' ], 'family' ) : null ] ,
1089 [ ( /^font-size$/ ), null, !removeFontStyles
? styleMigrateFilter( config
[ 'fontSize_style' ], 'size' ) : null ] ,
1090 [ ( /^color$/ ), null, !removeFontStyles
? styleMigrateFilter( config
[ 'colorButton_foreStyle' ], 'color' ) : null ] ,
1091 [ ( /^background-color$/ ), null, !removeFontStyles
? styleMigrateFilter( config
[ 'colorButton_backStyle' ], 'color' ) : null ]
1092 ] )( styleText
, element
) || '';
1098 // Migrate basic style formats to editor configured ones.
1099 'b' : elementMigrateFilter( config
[ 'coreStyles_bold' ] ),
1100 'i' : elementMigrateFilter( config
[ 'coreStyles_italic' ] ),
1101 'u' : elementMigrateFilter( config
[ 'coreStyles_underline' ] ),
1102 's' : elementMigrateFilter( config
[ 'coreStyles_strike' ] ),
1103 'sup' : elementMigrateFilter( config
[ 'coreStyles_superscript' ] ),
1104 'sub' : elementMigrateFilter( config
[ 'coreStyles_subscript' ] ),
1105 // Editor doesn't support anchor with content currently (#3582),
1106 // drop such anchors with content preserved.
1107 'a' : function( element
)
1109 var attrs
= element
.attributes
;
1110 if ( attrs
&& !attrs
.href
&& attrs
.name
)
1111 delete element
.name
;
1112 else if ( CKEDITOR
.env
.webkit
&& attrs
.href
&& attrs
.href
.match( /file:\/\/\/[\S]+#/i ) )
1113 attrs
.href
= attrs
.href
.replace( /file:\/\/\/[^#]+/i,'' );
1115 'cke:listbullet' : function( element
)
1117 if ( element
.getAncestor( /h\d/ ) && !config
.pasteFromWordNumberedHeadingToList
)
1118 delete element
.name
;
1124 // Remove onmouseover and onmouseout events (from MS Word comments effect)
1125 [ ( /^onmouse(:?out|over)/ ), '' ],
1126 // Onload on image element.
1127 [ ( /^onload$/ ), '' ],
1128 // Remove office and vml attribute from elements.
1129 [ ( /(?:v|o):\w+/ ), '' ],
1130 // Remove lang/language attributes.
1136 'style' : stylesFilter(
1138 // Provide a white-list of styles that we preserve, those should
1139 // be the ones that could later be altered with editor tools.
1141 // Leave list-style-type
1142 [ ( /^list-style-type$/ ), null ],
1144 // Preserve margin-left/right which used as default indent style in the editor.
1145 [ ( /^margin$|^margin-(?!bottom|top)/ ), null, function( value
, element
, name
)
1147 if ( element
.name
in { p
: 1, div
: 1 } )
1149 var indentStyleName
= config
.contentsLangDirection
== 'ltr' ?
1150 'margin-left' : 'margin-right';
1152 // Extract component value from 'margin' shorthand.
1153 if ( name
== 'margin' )
1155 value
= getStyleComponents( name
, value
,
1156 [ indentStyleName
] )[ indentStyleName
];
1158 else if ( name
!= indentStyleName
)
1161 if ( value
&& !emptyMarginRegex
.test( value
) )
1162 return [ indentStyleName
, value
];
1168 // Preserve clear float style.
1171 [ ( /^border.*|margin.*|vertical-align|float$/ ), null,
1172 function( value
, element
)
1174 if ( element
.name
== 'img' )
1178 [ (/^width|height$/ ), null,
1179 function( value
, element
)
1181 if ( element
.name
in { table
: 1, td
: 1, th
: 1, img
: 1 } )
1185 // Otherwise provide a black-list of styles that we remove.
1188 // Fixing color values.
1189 [ ( /-color$/ ), null, function( value
)
1191 if ( value
== 'transparent' )
1193 if ( CKEDITOR
.env
.gecko
)
1194 return value
.replace( /-moz-use-text-color/g, 'transparent' );
1196 // Remove empty margin values, e.g. 0.00001pt 0em 0pt
1197 [ ( /^margin$/ ), emptyMarginRegex
],
1198 [ 'text-indent', '0cm' ],
1199 [ 'page-break-before' ],
1201 [ 'display', 'none' ],
1202 removeFontStyles
? [ ( /font-?/ ) ] : null
1205 // Prefer width styles over 'width' attributes.
1206 'width' : function( value
, element
)
1208 if ( element
.name
in dtd
.$tableContent
)
1211 // Prefer border styles over table 'border' attributes.
1212 'border' : function( value
, element
)
1214 if ( element
.name
in dtd
.$tableContent
)
1218 // Only Firefox carry style sheet from MS-Word, which
1219 // will be applied by us manually. For other browsers
1220 // the css className is useless.
1221 'class' : falsyFilter
,
1223 // MS-Word always generate 'background-color' along with 'bgcolor',
1224 // simply drop the deprecated attributes.
1225 'bgcolor' : falsyFilter
,
1227 // Deprecate 'valign' attribute in favor of 'vertical-align'.
1228 'valign' : removeStyles
? falsyFilter : function( value
, element
)
1230 element
.addStyle( 'vertical-align', value
);
1235 // Fore none-IE, some useful data might be buried under these IE-conditional
1236 // comments where RegExp were the right approach to dig them out where usual approach
1237 // is transform it into a fake element node which hold the desired data.
1240 function( value
, node
)
1242 var imageInfo
= value
.match( /<img.*?>/ ),
1243 listInfo
= value
.match( /^\[if !supportLists\]([\s\S]*?)\[endif\]$/ );
1245 // Seek for list bullet indicator.
1248 // Bullet symbol could be either text or an image.
1249 var listSymbol
= listInfo
[ 1 ] || ( imageInfo
&& 'l.' ),
1250 listType
= listSymbol
&& listSymbol
.match( />(?:[(]?)([^\s]+?)([.)]?)</ );
1251 return createListBulletMarker( listType
, listSymbol
);
1254 // Reveal the <img> element in conditional comments for Firefox.
1255 if ( CKEDITOR
.env
.gecko
&& imageInfo
)
1257 var img
= CKEDITOR
.htmlParser
.fragment
.fromHtml( imageInfo
[ 0 ] ).children
[ 0 ],
1258 previousComment
= node
.previous
,
1259 // Try to dig the real image link from vml markup from previous comment text.
1260 imgSrcInfo
= previousComment
&& previousComment
.value
.match( /<v:imagedata[^>]*o:href=['"](.*?)['"]/ ),
1261 imgSrc
= imgSrcInfo
&& imgSrcInfo
[ 1 ];
1263 // Is there a real 'src' url to be used?
1264 imgSrc
&& ( img
.attributes
.src
= imgSrc
);
1275 // The paste processor here is just a reduced copy of html data processor.
1276 var pasteProcessor = function()
1278 this.dataFilter
= new CKEDITOR
.htmlParser
.filter();
1281 pasteProcessor
.prototype =
1283 toHtml : function( data
)
1285 var fragment
= CKEDITOR
.htmlParser
.fragment
.fromHtml( data
, false ),
1286 writer
= new CKEDITOR
.htmlParser
.basicWriter();
1288 fragment
.writeHtml( writer
, this.dataFilter
);
1289 return writer
.getHtml( true );
1293 CKEDITOR
.cleanWord = function( data
, editor
)
1295 // Firefox will be confused by those downlevel-revealed IE conditional
1296 // comments, fixing them first( convert it to upperlevel-revealed one ).
1297 // e.g. <![if !vml]>...<![endif]>
1298 if ( CKEDITOR
.env
.gecko
)
1299 data
= data
.replace( /(<!--\[if[^<]*?\])-->([\S\s]*?)<!--(\[endif\]-->)/gi, '$1$2$3' );
1301 var dataProcessor
= new pasteProcessor(),
1302 dataFilter
= dataProcessor
.dataFilter
;
1304 // These rules will have higher priorities than default ones.
1305 dataFilter
.addRules( CKEDITOR
.plugins
.pastefromword
.getRules( editor
) );
1307 // Allow extending data filter rules.
1308 editor
.fire( 'beforeCleanWord', { filter
: dataFilter
} );
1312 data
= dataProcessor
.toHtml( data
, false );
1316 alert( editor
.lang
.pastefromword
.error
);
1319 /* Below post processing those things that are unable to delivered by filter rules. */
1321 // Remove 'cke' namespaced attribute used in filter rules as marker.
1322 data
= data
.replace( /cke:.*?".*?"/g, '' );
1324 // Remove empty style attribute.
1325 data
= data
.replace( /style=""/g, '' );
1327 // Remove the dummy spans ( having no inline style ).
1328 data
= data
.replace( /<span>/g, '' );
1335 * Whether to ignore all font related formatting styles, including:
1336 * <ul> <li>font size;</li>
1337 * <li>font family;</li>
1338 * <li>font foreground/background color.</li></ul>
1339 * @name CKEDITOR.config.pasteFromWordRemoveFontStyles
1344 * config.pasteFromWordRemoveFontStyles = false;
1348 * Whether to transform MS Word outline numbered headings into lists.
1349 * @name CKEDITOR.config.pasteFromWordNumberedHeadingToList
1354 * config.pasteFromWordNumberedHeadingToList = true;
1358 * Whether to remove element styles that can't be managed with the editor. Note
1359 * that this doesn't handle the font specific styles, which depends on the
1360 * {@link CKEDITOR.config.pasteFromWordRemoveFontStyles} setting instead.
1361 * @name CKEDITOR.config.pasteFromWordRemoveStyles
1366 * config.pasteFromWordRemoveStyles = false;