txtreader.go 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. package reader
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. "regexp"
  7. "git.wecise.com/wecise/cgimport/schema"
  8. )
  9. type TXTBlockReader struct {
  10. *LineReader
  11. filetype schema.FileType
  12. firstline string
  13. nextline string
  14. }
  15. func NewTXTBlockReader(filename string, filetype schema.FileType, reader io.Reader) *TXTBlockReader {
  16. return &TXTBlockReader{
  17. LineReader: NewLineReader(filename, reader),
  18. filetype: filetype,
  19. }
  20. }
  21. var regrecordstart = regexp.MustCompile(`^(?:[\.\/a-zA-Z0-9_]*:)?V:(\{.*)`)
  22. var regrecordend = regexp.MustCompile(`\}\s*$`)
  23. func (br *TXTBlockReader) ReadBlock() (block map[string]any, line string, linecount int, err error) {
  24. eof := false
  25. line = br.nextline
  26. for {
  27. for {
  28. br.nextline, linecount, eof, err = br.ReadLine()
  29. if err != nil {
  30. return
  31. }
  32. if br.nextline == "" && eof {
  33. if line == "" {
  34. return
  35. }
  36. break
  37. }
  38. if regrecordend.MatchString(line) {
  39. break
  40. }
  41. if regrecordstart.MatchString(br.nextline) {
  42. break
  43. }
  44. line += br.nextline
  45. }
  46. linecount--
  47. if !regrecordstart.MatchString(line) || !regrecordend.MatchString(line) {
  48. if line != "" {
  49. logger.Info(fmt.Sprint("skip non-json line ", br.filename, ":", linecount, " ", line))
  50. }
  51. if linecount == 1 {
  52. br.firstline = line
  53. }
  54. line = br.nextline
  55. continue
  56. }
  57. line = regrecordstart.ReplaceAllString(line, "$1")
  58. block = map[string]any{}
  59. err = json.Unmarshal([]byte(line), &block)
  60. if err != nil {
  61. return
  62. }
  63. return
  64. }
  65. }