libf 3 місяців тому
батько
коміт
170b3f9e83

+ 14 - 10
cgf/cgf.go

@@ -11,6 +11,7 @@ import (
 
 	"git.wecise.com/wecise/cgimport/cgf/reader"
 	"git.wecise.com/wecise/util/filewalker"
+	"git.wecise.com/wecise/util/merrs"
 	"git.wecise.com/wecise/util/rc"
 )
 
@@ -54,35 +55,38 @@ func ImportDir(datapath string, parallel int) (filescount, recordscount int64, e
 func ImportFile(filepath string) (blockcount int, err error) {
 	f, e := os.Open(filepath)
 	if e != nil {
-		return blockcount, e
+		return blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filepath}})
 	}
 	defer f.Close()
-	return importReader(f)
+	return importReader(filepath, f)
 }
 
-func importReader(buf io.Reader) (blockcount int, err error) {
-	br := reader.NewBlockReader(buf)
+func importReader(filename string, buf io.Reader) (blockcount int, err error) {
+	br, e := reader.NewBlockReader(filename, buf)
+	if e != nil {
+		return blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}})
+	}
 	for {
-		block, e := br.ReadBlock()
+		block, linecount, e := br.ReadBlock()
 		if e != nil {
-			return blockcount, e
+			return blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"line": fmt.Sprint(linecount)}})
 		}
 		if block == nil {
 			return
 		}
-		e = importBlock(block)
+		e = importBlock(block, filename, linecount)
 		if e != nil {
-			return blockcount, e
+			return blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"line": fmt.Sprint(linecount)}})
 		}
 		blockcount++
 	}
 }
 
-func importBlock(block map[string]any) (err error) {
+func importBlock(block map[string]any, filename string, linecount int) (err error) {
 	bs, e := json.MarshalIndent(block, "", "  ")
 	if e != nil {
 		return e
 	}
-	fmt.Println("import:", string(bs))
+	fmt.Println(fmt.Sprint("import ", filename, "[", linecount, "]:", string(bs)))
 	return
 }

+ 39 - 0
cgf/reader/base_linereader.go

@@ -0,0 +1,39 @@
+package reader
+
+import (
+	"bufio"
+	"io"
+)
+
+type LineReader struct {
+	filename  string
+	reader    *bufio.Reader
+	linecount int
+}
+
+func NewLineReader(filename string, reader io.Reader) *LineReader {
+	return &LineReader{
+		filename: filename,
+		reader:   bufio.NewReader(reader),
+	}
+}
+
+func (br *LineReader) ReadLine() (line string, linecount int, eof bool, err error) {
+	var linebs []byte
+	var isprefix bool = true
+	for isprefix {
+		linebs, isprefix, err = br.reader.ReadLine()
+		if err != nil {
+			if err == io.EOF {
+				err = nil
+				eof = true
+				break
+			}
+			return
+		}
+		line += string(linebs)
+	}
+	br.linecount++
+	linecount = br.linecount
+	return
+}

+ 23 - 0
cgf/reader/blockreader.go

@@ -0,0 +1,23 @@
+package reader
+
+import (
+	"io"
+	"path/filepath"
+
+	"git.wecise.com/wecise/util/merrs"
+)
+
+type BlockReader interface {
+	ReadBlock() (block map[string]any, linecount int, err error)
+}
+
+func NewBlockReader(filename string, reader io.Reader) (BlockReader, error) {
+	filetype := filepath.Ext(filename)
+	switch filetype {
+	case ".csv":
+		return NewCSVBlockReader(filename, reader), nil
+	case ".txt":
+		return NewTXTBlockReader(filename, reader), nil
+	}
+	return nil, merrs.UnsupportedError.NewError("unsupported data format " + filetype)
+}

+ 54 - 0
cgf/reader/csvreader.go

@@ -0,0 +1,54 @@
+package reader
+
+import (
+	"fmt"
+	"io"
+	"strings"
+
+	"git.wecise.com/wecise/util/merrs"
+)
+
+type CSVBlockReader struct {
+	*LineReader
+	csvkeys []string
+}
+
+func NewCSVBlockReader(filename string, reader io.Reader) *CSVBlockReader {
+	return &CSVBlockReader{
+		LineReader: NewLineReader(filename, reader),
+	}
+}
+
+func (br *CSVBlockReader) ReadBlock() (block map[string]any, linecount int, err error) {
+	var line string
+	eof := false
+	for {
+		line, linecount, eof, err = br.ReadLine()
+		if err != nil {
+			return
+		}
+		if linecount == 1 {
+			br.csvkeys = strings.Split(line, "^")
+			line, linecount, eof, err = br.ReadLine()
+			if err != nil {
+				return
+			}
+		}
+		if line == "" {
+			if eof {
+				return
+			}
+			continue
+		}
+		values := strings.Split(line, "^")
+		if len(values) != len(br.csvkeys) {
+			err = merrs.NewError(fmt.Sprint(br.filename, " format error, values count not match keys count, line ", br.linecount))
+			return
+		}
+		block = map[string]any{}
+		for i, k := range br.csvkeys {
+			block[k] = values[i]
+		}
+		return
+	}
+}

+ 0 - 34
cgf/reader/reader.go

@@ -1,34 +0,0 @@
-package reader
-
-import (
-	"bufio"
-	"io"
-)
-
-type BlockReader struct {
-	reader *bufio.Reader
-}
-
-func NewBlockReader(reader io.Reader) *BlockReader {
-	return &BlockReader{
-		reader: bufio.NewReader(reader),
-	}
-}
-
-func (br *BlockReader) ReadBlock() (block map[string]any, err error) {
-	var str string
-	var line []byte
-	var isprefix bool = true
-	for isprefix {
-		line, isprefix, err = br.reader.ReadLine()
-		if err != nil {
-			if err == io.EOF {
-				err = nil
-			}
-			return nil, err
-		}
-		str += string(line)
-	}
-	block = map[string]any{"line": str}
-	return
-}

+ 55 - 0
cgf/reader/txtreader.go

@@ -0,0 +1,55 @@
+package reader
+
+import (
+	"encoding/json"
+	"io"
+	"regexp"
+)
+
+type TXTBlockReader struct {
+	*LineReader
+	nextline string
+}
+
+func NewTXTBlockReader(filename string, reader io.Reader) *TXTBlockReader {
+	return &TXTBlockReader{
+		LineReader: NewLineReader(filename, reader),
+	}
+}
+
+var regrecord = regexp.MustCompile(`^(?:[\.\/a-zA-Z0-9_]*:)?V:(\{.*)`)
+
+func (br *TXTBlockReader) ReadBlock() (block map[string]any, linecount int, err error) {
+	eof := false
+	line := br.nextline
+	for {
+		for {
+			br.nextline, linecount, eof, err = br.ReadLine()
+			if err != nil {
+				return
+			}
+			if br.nextline == "" && eof {
+				if line == "" {
+					return
+				}
+				break
+			}
+			if regrecord.MatchString(br.nextline) {
+				break
+			}
+			line += br.nextline
+		}
+		linecount--
+		if !regrecord.MatchString(line) {
+			line = br.nextline
+			continue
+		}
+		line = regrecord.ReplaceAllString(line, "$1")
+		block = map[string]any{}
+		err = json.Unmarshal([]byte(line), &block)
+		if err != nil {
+			return
+		}
+		return
+	}
+}