txtreader.go 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. package reader
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. "regexp"
  7. "git.wecise.com/wecise/cgimport/schema"
  8. "git.wecise.com/wecise/util/logger"
  9. )
  10. type TXTBlockReader struct {
  11. *LineReader
  12. filetype schema.FileType
  13. firstline string
  14. nextline string
  15. }
  16. func NewTXTBlockReader(filename string, filetype schema.FileType, reader io.Reader) *TXTBlockReader {
  17. return &TXTBlockReader{
  18. LineReader: NewLineReader(filename, reader),
  19. filetype: filetype,
  20. }
  21. }
  22. var regrecordstart = regexp.MustCompile(`^(?:[\.\/a-zA-Z0-9_]*:)?V:(\{.*)`)
  23. var regrecordend = regexp.MustCompile(`\}\s*$`)
  24. func (br *TXTBlockReader) ReadBlock() (block map[string]any, line string, linecount int, err error) {
  25. eof := false
  26. line = br.nextline
  27. for {
  28. for {
  29. br.nextline, linecount, eof, err = br.ReadLine()
  30. if err != nil {
  31. return
  32. }
  33. if br.nextline == "" && eof {
  34. if line == "" {
  35. return
  36. }
  37. break
  38. }
  39. if regrecordend.MatchString(line) {
  40. break
  41. }
  42. if regrecordstart.MatchString(br.nextline) {
  43. break
  44. }
  45. line += br.nextline
  46. }
  47. linecount--
  48. if !regrecordstart.MatchString(line) || !regrecordend.MatchString(line) {
  49. if line != "" {
  50. logger.Info(fmt.Sprint("skip non-json line ", br.filename, ":", linecount, " ", line))
  51. }
  52. if linecount == 1 {
  53. br.firstline = line
  54. }
  55. line = br.nextline
  56. continue
  57. }
  58. line = regrecordstart.ReplaceAllString(line, "$1")
  59. block = map[string]any{}
  60. err = json.Unmarshal([]byte(line), &block)
  61. if err != nil {
  62. return
  63. }
  64. return
  65. }
  66. }