Documentation
¶
Overview ¶
Package textextractor provides functionality to extract plain text from HWPX documents.
Index ¶
- Constants
- func CreateExtractor(objectType baseobject.ObjectType, manager comm.ExtractorManager, ...) comm.Extractor
- func Extract(hwpxFile *object.HWPXFile, method TextExtractMethod, insertParaHead bool, ...) string
- func ExtractFrom(from baseobject.HWPXObject, method TextExtractMethod, textMarks *TextMarks) string
- func ParaHeadNumber_ToString1(value int, format enumtype.NumberType1) string
- func ParaHeadNumber_ToString2(value int, format enumtype.NumberType2) string
- type FromCaption
- type FromContainer
- type FromDrawText
- type FromDrawingObject
- type FromFieldBegin
- type FromFieldEnd
- type FromHWPXFile
- type FromParaControl
- type FromParaWithAppendingControlTextAfterParagraphText
- type FromParaWithInsertingControlTextBetweenParagraphText
- type FromRun
- type FromSectionXMLFile
- type FromSubList
- type FromT
- type FromTable
- type FromTc
- type NoWorkingExtractor
- type ParaHeadMaker
- type ParaNumber
- type Parameter
- type TextExtractMethod
- type TextMarks
- func (m *TextMarks) ArcEndAnd(v string) *TextMarks
- func (m *TextMarks) ArcStartAnd(v string) *TextMarks
- func (m *TextMarks) ConnectLineEndAnd(v string) *TextMarks
- func (m *TextMarks) ConnectLineStartAnd(v string) *TextMarks
- func (m *TextMarks) ContainerEndAnd(v string) *TextMarks
- func (m *TextMarks) ContainerStartAnd(v string) *TextMarks
- func (m *TextMarks) CurveEndAnd(v string) *TextMarks
- func (m *TextMarks) CurveStartAnd(v string) *TextMarks
- func (m *TextMarks) EllipseEndAnd(v string) *TextMarks
- func (m *TextMarks) EllipseStartAnd(v string) *TextMarks
- func (m *TextMarks) FieldEndAnd(v string) *TextMarks
- func (m *TextMarks) FieldStartAnd(v string) *TextMarks
- func (m *TextMarks) GetArcEnd() string
- func (m *TextMarks) GetArcStart() string
- func (m *TextMarks) GetConnectLineEnd() string
- func (m *TextMarks) GetConnectLineStart() string
- func (m *TextMarks) GetContainerEnd() string
- func (m *TextMarks) GetContainerStart() string
- func (m *TextMarks) GetCurveEnd() string
- func (m *TextMarks) GetCurveStart() string
- func (m *TextMarks) GetEllipseEnd() string
- func (m *TextMarks) GetEllipseStart() string
- func (m *TextMarks) GetFieldEnd() string
- func (m *TextMarks) GetFieldStart() string
- func (m *TextMarks) GetLineBreak() string
- func (m *TextMarks) GetLineEnd() string
- func (m *TextMarks) GetLineStart() string
- func (m *TextMarks) GetParaSeparator() string
- func (m *TextMarks) GetPolygonEnd() string
- func (m *TextMarks) GetPolygonStart() string
- func (m *TextMarks) GetRectangleEnd() string
- func (m *TextMarks) GetRectangleStart() string
- func (m *TextMarks) GetTab() string
- func (m *TextMarks) GetTableCellSeparator() string
- func (m *TextMarks) GetTableEnd() string
- func (m *TextMarks) GetTableRowSeparator() string
- func (m *TextMarks) GetTableStart() string
- func (m *TextMarks) GetTextArtEnd() string
- func (m *TextMarks) GetTextArtStart() string
- func (m *TextMarks) LineBreakAnd(v string) *TextMarks
- func (m *TextMarks) LineEndAnd(v string) *TextMarks
- func (m *TextMarks) LineStartAnd(v string) *TextMarks
- func (m *TextMarks) ParaSeparatorAnd(v string) *TextMarks
- func (m *TextMarks) PolygonEndAnd(v string) *TextMarks
- func (m *TextMarks) PolygonStartAnd(v string) *TextMarks
- func (m *TextMarks) RectangleEndAnd(v string) *TextMarks
- func (m *TextMarks) RectangleStartAnd(v string) *TextMarks
- func (m *TextMarks) TabAnd(v string) *TextMarks
- func (m *TextMarks) TableCellSeparatorAnd(v string) *TextMarks
- func (m *TextMarks) TableEndAnd(v string) *TextMarks
- func (m *TextMarks) TableRowSeparatorAnd(v string) *TextMarks
- func (m *TextMarks) TableStartAnd(v string) *TextMarks
- func (m *TextMarks) TextArtStartAnd(v string) *TextMarks
Constants ¶
View Source
const LevelCount = 10
View Source
const OutlineStyleEngNamePrefix = "Outline "
Variables ¶
This section is empty.
Functions ¶
func CreateExtractor ¶
func CreateExtractor(objectType baseobject.ObjectType, manager comm.ExtractorManager, parameter comm.ParameterInterface) comm.Extractor
func Extract ¶
func Extract(hwpxFile *object.HWPXFile, method TextExtractMethod, insertParaHead bool, textMarks *TextMarks) string
Extract extracts all text from the given HWPXFile using the specified method and marks. If insertParaHead is true, it also includes paragraph numbers/bullets in the output.
func ExtractFrom ¶
func ExtractFrom(from baseobject.HWPXObject, method TextExtractMethod, textMarks *TextMarks) string
ExtractFrom extracts text starting from a specific HWPXObject within the document tree.
func ParaHeadNumber_ToString1 ¶
func ParaHeadNumber_ToString1(value int, format enumtype.NumberType1) string
func ParaHeadNumber_ToString2 ¶
func ParaHeadNumber_ToString2(value int, format enumtype.NumberType2) string
Types ¶
type FromCaption ¶
type FromCaption struct {
*comm.ExtractorBase
}
func NewFromCaption ¶
func NewFromCaption(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromCaption
func (*FromCaption) Extract ¶
func (f *FromCaption) Extract(from baseobject.HWPXObject)
func (*FromCaption) ObjectType ¶
func (f *FromCaption) ObjectType() baseobject.ObjectType
type FromContainer ¶
type FromContainer struct {
*comm.ExtractorBase
}
func NewFromContainer ¶
func NewFromContainer(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromContainer
func (*FromContainer) Extract ¶
func (f *FromContainer) Extract(from baseobject.HWPXObject)
func (*FromContainer) ObjectType ¶
func (f *FromContainer) ObjectType() baseobject.ObjectType
type FromDrawText ¶
type FromDrawText struct {
*comm.ExtractorBase
}
func NewFromDrawText ¶
func NewFromDrawText(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromDrawText
func (*FromDrawText) Extract ¶
func (f *FromDrawText) Extract(from baseobject.HWPXObject)
func (*FromDrawText) ObjectType ¶
func (f *FromDrawText) ObjectType() baseobject.ObjectType
type FromDrawingObject ¶
type FromDrawingObject struct {
*comm.ExtractorBase
// contains filtered or unexported fields
}
func NewFromDrawingObject ¶
func NewFromDrawingObject(manager comm.ExtractorManager, parameter comm.ParameterInterface, objectType baseobject.ObjectType) *FromDrawingObject
func (*FromDrawingObject) Extract ¶
func (f *FromDrawingObject) Extract(from baseobject.HWPXObject)
func (*FromDrawingObject) ObjectType ¶
func (f *FromDrawingObject) ObjectType() baseobject.ObjectType
type FromFieldBegin ¶
type FromFieldBegin struct {
*comm.ExtractorBase
}
func NewFromFieldBegin ¶
func NewFromFieldBegin(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromFieldBegin
func (*FromFieldBegin) Extract ¶
func (f *FromFieldBegin) Extract(from baseobject.HWPXObject)
func (*FromFieldBegin) ObjectType ¶
func (f *FromFieldBegin) ObjectType() baseobject.ObjectType
type FromFieldEnd ¶
type FromFieldEnd struct {
*comm.ExtractorBase
}
func NewFromFieldEnd ¶
func NewFromFieldEnd(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromFieldEnd
func (*FromFieldEnd) Extract ¶
func (f *FromFieldEnd) Extract(from baseobject.HWPXObject)
func (*FromFieldEnd) ObjectType ¶
func (f *FromFieldEnd) ObjectType() baseobject.ObjectType
type FromHWPXFile ¶
type FromHWPXFile struct {
*comm.ExtractorBase
}
func NewFromHWPXFile ¶
func NewFromHWPXFile(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromHWPXFile
func (*FromHWPXFile) Extract ¶
func (f *FromHWPXFile) Extract(from baseobject.HWPXObject)
func (*FromHWPXFile) ObjectType ¶
func (f *FromHWPXFile) ObjectType() baseobject.ObjectType
type FromParaControl ¶
type FromParaControl struct {
*comm.ExtractorBase
}
func NewFromParaControl ¶
func NewFromParaControl(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromParaControl
func (*FromParaControl) Extract ¶
func (f *FromParaControl) Extract(from baseobject.HWPXObject)
func (*FromParaControl) ObjectType ¶
func (f *FromParaControl) ObjectType() baseobject.ObjectType
type FromParaWithAppendingControlTextAfterParagraphText ¶
type FromParaWithAppendingControlTextAfterParagraphText struct {
*comm.ExtractorBase
}
func NewFromParaWithAppendingControlTextAfterParagraphText ¶
func NewFromParaWithAppendingControlTextAfterParagraphText(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromParaWithAppendingControlTextAfterParagraphText
func (*FromParaWithAppendingControlTextAfterParagraphText) Extract ¶
func (f *FromParaWithAppendingControlTextAfterParagraphText) Extract(from baseobject.HWPXObject)
func (*FromParaWithAppendingControlTextAfterParagraphText) ObjectType ¶
func (f *FromParaWithAppendingControlTextAfterParagraphText) ObjectType() baseobject.ObjectType
type FromParaWithInsertingControlTextBetweenParagraphText ¶
type FromParaWithInsertingControlTextBetweenParagraphText struct {
*comm.ExtractorBase
}
func NewFromParaWithInsertingControlTextBetweenParagraphText ¶
func NewFromParaWithInsertingControlTextBetweenParagraphText(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromParaWithInsertingControlTextBetweenParagraphText
func (*FromParaWithInsertingControlTextBetweenParagraphText) Extract ¶
func (f *FromParaWithInsertingControlTextBetweenParagraphText) Extract(from baseobject.HWPXObject)
func (*FromParaWithInsertingControlTextBetweenParagraphText) ObjectType ¶
func (f *FromParaWithInsertingControlTextBetweenParagraphText) ObjectType() baseobject.ObjectType
type FromRun ¶
type FromRun struct {
*comm.ExtractorBase
}
func NewFromRun ¶
func NewFromRun(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromRun
func (*FromRun) Extract ¶
func (f *FromRun) Extract(from baseobject.HWPXObject)
func (*FromRun) ObjectType ¶
func (f *FromRun) ObjectType() baseobject.ObjectType
type FromSectionXMLFile ¶
type FromSectionXMLFile struct {
*comm.ExtractorBase
}
func NewFromSectionXMLFile ¶
func NewFromSectionXMLFile(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromSectionXMLFile
func (*FromSectionXMLFile) Extract ¶
func (f *FromSectionXMLFile) Extract(from baseobject.HWPXObject)
func (*FromSectionXMLFile) ObjectType ¶
func (f *FromSectionXMLFile) ObjectType() baseobject.ObjectType
type FromSubList ¶
type FromSubList struct {
*comm.ExtractorBase
}
func NewFromSubList ¶
func NewFromSubList(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromSubList
func (*FromSubList) Extract ¶
func (f *FromSubList) Extract(from baseobject.HWPXObject)
func (*FromSubList) ObjectType ¶
func (f *FromSubList) ObjectType() baseobject.ObjectType
type FromT ¶
type FromT struct {
*comm.ExtractorBase
}
func NewFromT ¶
func NewFromT(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromT
func (*FromT) Extract ¶
func (f *FromT) Extract(from baseobject.HWPXObject)
func (*FromT) ObjectType ¶
func (f *FromT) ObjectType() baseobject.ObjectType
type FromTable ¶
type FromTable struct {
*comm.ExtractorBase
}
func NewFromTable ¶
func NewFromTable(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromTable
func (*FromTable) Extract ¶
func (f *FromTable) Extract(from baseobject.HWPXObject)
func (*FromTable) ObjectType ¶
func (f *FromTable) ObjectType() baseobject.ObjectType
type FromTc ¶
type FromTc struct {
*comm.ExtractorBase
}
func NewFromTc ¶
func NewFromTc(manager comm.ExtractorManager, parameter comm.ParameterInterface) *FromTc
func (*FromTc) Extract ¶
func (f *FromTc) Extract(from baseobject.HWPXObject)
func (*FromTc) ObjectType ¶
func (f *FromTc) ObjectType() baseobject.ObjectType
type NoWorkingExtractor ¶
type NoWorkingExtractor struct {
*comm.ExtractorBase
}
func NewNoWorkingExtractor ¶
func NewNoWorkingExtractor(manager comm.ExtractorManager, parameter comm.ParameterInterface) *NoWorkingExtractor
func (*NoWorkingExtractor) Extract ¶
func (e *NoWorkingExtractor) Extract(from baseobject.HWPXObject)
func (*NoWorkingExtractor) ObjectType ¶
func (e *NoWorkingExtractor) ObjectType() baseobject.ObjectType
type ParaHeadMaker ¶
type ParaHeadMaker struct {
// contains filtered or unexported fields
}
func NewParaHeadMaker ¶
func NewParaHeadMaker(hwpxFile *object.HWPXFile) *ParaHeadMaker
func (*ParaHeadMaker) Make ¶
func (m *ParaHeadMaker) Make(para *paragraph.Para, builder *comm.TextBuilder)
func (*ParaHeadMaker) StartSection ¶
func (m *ParaHeadMaker) StartSection()
type ParaNumber ¶
type ParaNumber struct {
// contains filtered or unexported fields
}
func NewParaNumber ¶
func NewParaNumber() *ParaNumber
func (*ParaNumber) ChangedParaHead ¶
func (p *ParaNumber) ChangedParaHead(headID string) bool
func (*ParaNumber) Increase ¶
func (p *ParaNumber) Increase(level int)
func (*ParaNumber) Value ¶
func (p *ParaNumber) Value(level int) int
type Parameter ¶
type Parameter struct {
// contains filtered or unexported fields
}
func NewParameter ¶
func (*Parameter) TextBuilder ¶
func (p *Parameter) TextBuilder() *comm.TextBuilder
func (*Parameter) TextExtractMethod ¶
type TextExtractMethod ¶
type TextExtractMethod int
const ( TextExtractMethod_InsertControlTextBetweenParagraphText TextExtractMethod = iota TextExtractMethod_AppendControlTextAfterParagraphText )
type TextMarks ¶
type TextMarks struct {
ParaSeparator string
LineBreak string
Tab string
FieldStart string
FieldEnd string
TableStart string
TableEnd string
TableRowSeparator string
TableCellSeparator string
ContainerStart string
ContainerEnd string
LineStart string
LineEnd string
RectangleStart string
RectangleEnd string
EllipseStart string
EllipseEnd string
ArcStart string
ArcEnd string
PolygonStart string
PolygonEnd string
CurveStart string
CurveEnd string
ConnectLineStart string
ConnectLineEnd string
TextArtStart string
TextArtEnd string
}
func NewTextMarks ¶
func NewTextMarks() *TextMarks
func (*TextMarks) ArcStartAnd ¶
func (*TextMarks) ConnectLineEndAnd ¶
func (*TextMarks) ConnectLineStartAnd ¶
func (*TextMarks) ContainerEndAnd ¶
func (*TextMarks) ContainerStartAnd ¶
func (*TextMarks) CurveEndAnd ¶
func (*TextMarks) CurveStartAnd ¶
func (*TextMarks) EllipseEndAnd ¶
func (*TextMarks) EllipseStartAnd ¶
func (*TextMarks) FieldEndAnd ¶
func (*TextMarks) FieldStartAnd ¶
func (*TextMarks) GetArcStart ¶
func (*TextMarks) GetConnectLineEnd ¶
func (*TextMarks) GetConnectLineStart ¶
func (*TextMarks) GetContainerEnd ¶
func (*TextMarks) GetContainerStart ¶
func (*TextMarks) GetCurveEnd ¶
func (*TextMarks) GetCurveStart ¶
func (*TextMarks) GetEllipseEnd ¶
func (*TextMarks) GetEllipseStart ¶
func (*TextMarks) GetFieldEnd ¶
func (*TextMarks) GetFieldStart ¶
func (*TextMarks) GetLineBreak ¶
func (*TextMarks) GetLineEnd ¶
func (*TextMarks) GetLineStart ¶
func (*TextMarks) GetParaSeparator ¶
func (*TextMarks) GetPolygonEnd ¶
func (*TextMarks) GetPolygonStart ¶
func (*TextMarks) GetRectangleEnd ¶
func (*TextMarks) GetRectangleStart ¶
func (*TextMarks) GetTableCellSeparator ¶
func (*TextMarks) GetTableEnd ¶
func (*TextMarks) GetTableRowSeparator ¶
func (*TextMarks) GetTableStart ¶
func (*TextMarks) GetTextArtEnd ¶
func (*TextMarks) GetTextArtStart ¶
func (*TextMarks) LineBreakAnd ¶
func (*TextMarks) LineEndAnd ¶
func (*TextMarks) LineStartAnd ¶
func (*TextMarks) ParaSeparatorAnd ¶
func (*TextMarks) PolygonEndAnd ¶
func (*TextMarks) PolygonStartAnd ¶
func (*TextMarks) RectangleEndAnd ¶
func (*TextMarks) RectangleStartAnd ¶
func (*TextMarks) TableCellSeparatorAnd ¶
func (*TextMarks) TableEndAnd ¶
func (*TextMarks) TableRowSeparatorAnd ¶
func (*TextMarks) TableStartAnd ¶
func (*TextMarks) TextArtStartAnd ¶
Source Files
¶
- ExtractorCreator.go
- FromCaption.go
- FromContainer.go
- FromDrawText.go
- FromDrawingObject.go
- FromFieldBegin.go
- FromFieldEnd.go
- FromHWPXFile.go
- FromParaControl.go
- FromParaWithAppendingControlTextAfterParagraphText.go
- FromParaWithInsertingControlTextBetweenParagraphText.go
- FromRun.go
- FromSectionXMLFile.go
- FromSubList.go
- FromT.go
- FromTable.go
- FromTc.go
- ParaHeadMaker.go
- ParaHeadNumber.go
- ParaNumber.go
- Parameter.go
- TextExtractMethod.go
- TextExtractor.go
- TextMarks.go
Click to show internal directories.
Click to hide internal directories.