2020-05-22 golang 处理pdf、

robin_gu · · 1433 次点击 · · 开始浏览    
这是一个创建于 的文章,其中的信息可能已经有所发展或是发生改变。

func rmWaterMark(inputPath, outputPath string) error {
f, err := os.Open(inputPath)
if err != nil {
return err
}

defer f.Close()
//common.SetLogger(common.ConsoleLogger{LogLevel: 5})
pdfWriter := pdf.NewPdfWriter()
pdfReader, err := pdf.NewPdfReaderLazy(f)
if err != nil {
    return err
}
fileExt := filepath.Ext(f.Name())
fileName := strings.TrimSuffix(filepath.Base(f.Name()), fileExt)
println(fileName)
isEncrypted, err := pdfReader.IsEncrypted()
if err != nil {
    return err
}

if isEncrypted {
    _, err = pdfReader.Decrypt([]byte(""))
    if err != nil {
        return err
    }
}

numPages, err := pdfReader.GetNumPages()
if err != nil {
    return err
}
//pdfWriter := pdf.NewPdfWriter()

for i := 0; i < numPages; i++ {
    pdfPage, _ := pdfReader.GetPage(i + 1)

    fmt.Println("Start prcess page " + strconv.Itoa(i+1))
    if pdfPage.Contents != nil {
        typeOf := reflect.TypeOf(pdfPage.Contents)
        typeOfEl := typeOf.Elem()
        if typeOfEl == reflect.TypeOf(core.PdfObjectArray{}) {
            arrays := pdfPage.Contents.(*core.PdfObjectArray)

            if arrays.Len() > 0 {

                //  fterx := arrays.Get(0).(*core.PdfObjectReference).Resolve().(core.PdfObject).(*core.PdfObjectStream)
                for _, norContent := range arrays.Elements() {
                    fterValue := norContent.(*core.PdfObjectReference).Resolve().(core.PdfObject).(*core.PdfObjectStream)
                    filterTj(fterValue, false)
                }
            }
        }
        if typeOfEl == reflect.TypeOf(core.PdfObjectReference{}) {
            filter := pdfPage.Contents.(*core.PdfObjectReference).Resolve().(*core.PdfObjectStream)

            filterTj(filter, false)

        }

    }
    pdfWriter.AddPage(pdfPage)
}
outFull := outputPath + "rmwaterMarker" + fileExt
fmt.Println(outFull)
fWrite, err := os.Create(outFull)
if err != nil {
    return err
}

defer fWrite.Close()

err = pdfWriter.Write(fWrite)

return nil

}

func filterTj(content *core.PdfObjectStream, left bool) {

fla, _ := core.DecodeStream(content)
fmt.Println(string(fla))
fmt.Println("proc==================================")
cStreamParser := contentstream.NewContentStreamParser(string(fla))
parsed, _ := cStreamParser.Parse()
needreset := false

startrm := false
alltype := ""
for id, i2 := range * parsed {
    if !strings.Contains(alltype, i2.Operand) {
        alltype = alltype + " " + i2.Operand
    }
    //cCKkSQmcmlwreTfjBDCrigsTjMJdDoG
    if i2.Params != nil && !(strings.Contains("BDC k ri gs f EMC q W n  Q BT Tf ET K w d S G Do Td", i2.Operand)) {
        //
        //if i2.Operand == "TJ" { //处理文字
        //  pfoa := i2.Params[0].(*core.PdfObjectArray)
        //  for _, ix := range pfoa.Elements() {
        //      switch vv := ix.(type) {
        //      case *core.PdfObjectString:
        //          decoded := vv.Decoded()
        //          if strings.ContainsAny(decoded, "FãFþGe") || strings.ContainsAny(decoded, " �   -   ") {
        //              pfoa.Clear()
        //              needreset = true
        //          }
        //
        //      }
        //  }
        //
        //} else

        if i2.Operand == "Tm" { //处理文字
            startrm = false
            position := len(i2.Params) - 1
            x := i2.Params[position]
            y := i2.Params[position-1]
            typex := reflect.TypeOf(x)
            typey := reflect.TypeOf(y)
            if typex.Elem() == reflect.TypeOf(core.PdfObjectFloat(0)) && (typey.Elem() == reflect.TypeOf(core.PdfObjectFloat(0))) {

                if (* x.(*core.PdfObjectFloat) < 16 && * y.(*core.PdfObjectFloat) > 250 && *y.(*core.PdfObjectFloat) < 608) || validatekaKa(x.(*core.PdfObjectFloat), y.(*core.PdfObjectFloat)) {
                    (*parsed)[id] = &contentstream.ContentStreamOperation{}
                    needreset = true
                    startrm = true
                    fmt.Println("=============")
                    fmt.Println(i2)
                }
            }

        } else if i2.Operand == "cm" { //处理圆圈
            startrm = false
            position := len(i2.Params) - 1
            x := i2.Params[position]
            y := i2.Params[position-1]
            typex := reflect.TypeOf(x)
            typey := reflect.TypeOf(y)
            if typex.Elem() == reflect.TypeOf(core.PdfObjectFloat(0)) && (typey.Elem() == reflect.TypeOf(core.PdfObjectFloat(0))) {

                if validatekother(x.(*core.PdfObjectFloat), y.(*core.PdfObjectFloat)) {
                    (*parsed)[id] = &contentstream.ContentStreamOperation{}
                    needreset = true
                    startrm = true
                    fmt.Println("=============")
                    fmt.Println(i2)
                }

            }

        } else if i2.Operand == "l" || i2.Operand == "c" {
            if startrm {
                (*parsed)[id] = &contentstream.ContentStreamOperation{}
                startrm = true
                needreset = true
                fmt.Println("=============")
                fmt.Println(i2)
            }

            //fmt.Println(strconv.Itoa(i) )
        } else if i2.Operand == "h" || i2.Operand == "re" || i2.Operand == "TJ" || i2.Operand == "Tj" {
            if startrm {
                (*parsed)[id] = &contentstream.ContentStreamOperation{}
                fmt.Println("=============")
                fmt.Println(i2)
            }
        }
    }

}
fmt.Println(alltype)
if needreset {
    fmt.Println(string(parsed.Bytes()))
    content.Stream, _ = core.NewFlateEncoder().EncodeBytes(parsed.Bytes())
}

//fmt.Println(xk)

}

func validatekaKa(y, x *core.PdfObjectFloat) bool {
if * y < 23.5 && * x > 12 { //x坐标小于12 y坐标大于30 6 0 0 6 24.5576019(x) 30.1304016(y) Tm这样的就不处理
return true
}
return false
}
func validatekother(y, x core.PdfObjectFloat) bool {
if ((
y < 378.5 && *y > 370) || *y < 18) && * x > 568 && *x < 608 {
return true
}
return false
}

func splitPdf(inputPath string, outputPath string, splitfiles int) error {

f, err := os.Open(inputPath)
if err != nil {
    return err
}

defer f.Close()

pdfReader, err := pdf.NewPdfReaderLazy(f)
if err != nil {
    return err
}
fileExt := filepath.Ext(f.Name())
fileName := strings.TrimSuffix(filepath.Base(f.Name()), fileExt)
println(fileName)
isEncrypted, err := pdfReader.IsEncrypted()
if err != nil {
    return err
}

if isEncrypted {
    _, err = pdfReader.Decrypt([]byte(""))
    if err != nil {
        return err
    }
}

numPages, err := pdfReader.GetNumPages()
if err != nil {
    return err
}

prefilePages := int(math.Ceil(float64(numPages) / float64(splitfiles)))
println(strconv.Itoa(numPages) + " " + strconv.Itoa(prefilePages))

for i := 0; i < splitfiles; i++ {

    pdfWriter := pdf.NewPdfWriter()

    for y := i * prefilePages; y < numPages && y < (i+1)*prefilePages; y++ {

        pageNum := y + 1
        println(pageNum)
        page, err := pdfReader.GetPage(pageNum)
        if err != nil {
            return err
        }

        err = pdfWriter.AddPage(page)
        if err != nil {
            return err
        }

    }
    outFile := outputPath + fileName + strconv.Itoa(i) + fileExt
    println(outFile)
    fWrite, err := os.Create(outFile)
    if err != nil {
        return err
    }

    err = pdfWriter.Write(fWrite)
    fWrite.Close()
    if err != nil {
        return err
    }
}
return nil

}

func mergePdf(inputFolder string, outputPath string) error {

var inputPaths []string
allFiles, err := ioutil.ReadDir(inputFolder)

var fileExt string
for _, file := range allFiles {
    if !file.IsDir() {
        fileFullPath := inputFolder + file.Name()
        fileExt = filepath.Ext(fileFullPath)
        inputPaths = append(inputPaths, fileFullPath)
        println(fileFullPath)
    }

}
pdfWriter := pdf.NewPdfWriter()

for _, inputPath := range inputPaths {
    f, err := os.Open(inputPath)
    if err != nil {
        return err
    }

    defer f.Close()

    pdfReader, err := pdf.NewPdfReader(f)
    if err != nil {
        return err
    }

    isEncrypted, err := pdfReader.IsEncrypted()
    if err != nil {
        return err
    }

    if isEncrypted {
        auth, err := pdfReader.Decrypt([]byte(""))
        if err != nil {
            return err
        }
        if !auth {
            return errors.New("Cannot merge encrypted, password protected document")
        }
    }

    numPages, err := pdfReader.GetNumPages()
    if err != nil {
        return err
    }

    for i := 0; i < numPages; i++ {
        pageNum := i + 1

        page, err := pdfReader.GetPage(pageNum)
        if err != nil {
            return err
        }

        err = pdfWriter.AddPage(page)
        if err != nil {
            return err
        }
    }
}

fWrite, err := os.Create(outputPath + "merged" + fileExt)
if err != nil {
    return err
}

defer fWrite.Close()

err = pdfWriter.Write(fWrite)
if err != nil {
    return err
}

return nil

}


有疑问加站长微信联系(非本文作者)

本文来自:简书

感谢作者:robin_gu

查看原文:2020-05-22 golang 处理pdf、

入群交流(和以上内容无关):加入Go大咖交流群,或添加微信:liuxiaoyan-s 备注:入群;或加QQ群:692541889

1433 次点击  
加入收藏 微博
暂无回复
添加一条新回复 (您需要 登录 后才能回复 没有账号 ?)
  • 请尽量让自己的回复能够对别人有帮助
  • 支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`
  • 支持 @ 本站用户;支持表情(输入 : 提示),见 Emoji cheat sheet
  • 图片支持拖拽、截图粘贴等方式上传