libf
/
cgimport


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
							package importer

import (
	"encoding/json"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"regexp"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"git.wecise.com/wecise/cgimport/graph"
	"git.wecise.com/wecise/cgimport/odbc"
	"git.wecise.com/wecise/cgimport/reader"
	"git.wecise.com/wecise/cgimport/schema"
	"github.com/wecisecode/util/filewalker"
	"github.com/wecisecode/util/merrs"
	"github.com/wecisecode/util/rc"
)

var mcfg = odbc.Config
var logger = odbc.Logger

type Importer struct {
	datapath         string
	parallel         int
	rebuild          bool
	reload           bool
	importstatus     *CGIStatus
	fileimportrc     *rc.RoutinesController
	odbcqueryrc      *rc.RoutinesController
	odbcimporter     *ODBCImporter
	starttime        time.Time
	currentstarttime time.Time
}

func ImportDir(datapath string, parallel int, rebuild, reload bool) (totalfilescount, totallinecount, totalrecordscount int64, totalusetime time.Duration, filescount, linescount, recordscount int64, usetime time.Duration, err error) {
	concurlimt := mcfg.GetInt("odbc.concurrent.limit", 100)
	importer := &Importer{
		datapath:     datapath,
		parallel:     parallel,
		rebuild:      rebuild,
		reload:       reload,
		importstatus: NewCGIStatus(),
		fileimportrc: rc.NewRoutinesController("", parallel),
		odbcqueryrc:  rc.NewRoutinesControllerLimit("", concurlimt, concurlimt*2),
		odbcimporter: NewODBCImporter(),
	}
	return importer.Import()
}

func (importer *Importer) Import() (totalfilescount, totallinecount, totalrecordscount int64, totalusetime time.Duration, filescount, linescount, recordscount int64, usetime time.Duration, err error) {
	if odbc.DevPhase&odbc.DP_PROCESSCONTINUE != 0 && !importer.reload {
		// reload
		err = importer.importstatus.Load()
		if err != nil {
			return
		}
		err = importer.odbcimporter.init()
		if err != nil {
			return
		}
	}
	if importer.rebuild {
		// rebuild
		// 清除已有类
		err = importer.odbcimporter.rebuild()
		if err != nil {
			return
		}
	}
	// 建类
	err = importer.odbcimporter.ReviseClassStruct()
	if err != nil {
		return
	}
	totalusetime = importer.importstatus.TotalUseTime
	importer.starttime = time.Now().Add(-totalusetime)
	importer.currentstarttime = time.Now()

	reedgefile := regexp.MustCompile("(?i).*edge.*.csv")
	efc, elc, erc, ut, e := importer.ImportEdgeFiles(reedgefile, false)
	if e != nil {
		err = e
		return
	}
	afc, alc, arc, ut, e := importer.ImportNonEdgeFiles(reedgefile, true)
	if e != nil {
		err = e
		return
	}
	totalfilescount = int64(len(importer.importstatus.ImportStatus)) + efc
	for _, v := range importer.importstatus.ImportStatus {
		totallinecount += v.LinesCount
		totalrecordscount += v.RecordsCount
	}
	totallinecount += elc
	totalrecordscount += erc
	filescount = afc + efc
	linescount = alc + elc
	recordscount = arc + erc
	usetime = ut
	totalusetime = importer.importstatus.TotalUseTime

	importer.importstatus.WaitSaveDone()
	importer.alldone()
	return
}

func (importer *Importer) ImportEdgeFiles(reedgefile *regexp.Regexp, logstatus bool) (filescount, linecount, recordscount int64, usetime time.Duration, err error) {
	return importer.ImportFiles(func(basedir string, fpath string) FWOP {
		if !reedgefile.MatchString(filepath.Base(fpath)) {
			// 忽略非EDGE文件
			return FWOP_IGNORE
		}
		return FWOP_CONTINUE
	}, logstatus)
}

func (importer *Importer) ImportNonEdgeFiles(reedgefile *regexp.Regexp, logstatus bool) (filescount, linecount, recordscount int64, usetime time.Duration, err error) {
	return importer.ImportFiles(func(basedir string, fpath string) FWOP {
		if reedgefile.MatchString(filepath.Base(fpath)) {
			// 忽略EDGE文件
			return FWOP_IGNORE
		}
		return FWOP_CONTINUE
	}, logstatus)
}

type FWOP int

const (
	FWOP_IGNORE FWOP = iota + 1
	FWOP_BREAK
	FWOP_CONTINUE
)

func (importer *Importer) ImportFiles(fwop func(basedir string, fpath string) FWOP, logstatus bool) (filescount, linescount, recordscount int64, usetime time.Duration, err error) {
	// 遍历文件目录
	var wg sync.WaitGroup
	fw, e := filewalker.NewFileWalker([]string{importer.datapath}, ".*")
	if e != nil {
		err = e
		return
	}
	e = fw.List(func(basedir string, fpath string) bool {
		if err != nil {
			// 前方发生错误，结束遍历
			return false
		}
		if strings.Contains(fpath, string(filepath.Separator)) {
			// 忽略子目录，fw.List有序，目录排在文件后面，遇到子目录即可结束遍历
			return false
		}
		switch fwop(basedir, fpath) {
		case FWOP_IGNORE:
			// 忽略当前文件，继续处理下一文件
			return true
		case FWOP_BREAK:
			// 结束遍历
			return false
		case FWOP_CONTINUE:
		default:
		}
		// 继续处理当前文件
		filename := filepath.Join(basedir, fpath)
		filescount++
		wg.Add(1)
		// 并发处理
		importer.fileimportrc.ConcurCall(1,
			func() {
				defer wg.Done()
				importer.importstatus.mutex.RLock()
				importstatus := importer.importstatus.ImportStatus[filename]
				importer.importstatus.mutex.RUnlock()
				linefrom, blockfrom := int64(0), int64(0)
				if importstatus != nil {
					linefrom, blockfrom = importstatus.LinesCount, importstatus.RecordsCount
				}
				if linefrom == 0 {
					logger.Info("import", "file", filename)
				} else {
					logger.Info("import", "file", filename, "from line", linefrom)
				}
				lines, records, e := importer.ImportFile(filename, linefrom, blockfrom, logstatus)
				if e != nil {
					err = e
					return
				}
				atomic.AddInt64(&linescount, lines-linefrom)
				atomic.AddInt64(&recordscount, records-blockfrom)
				usetime = time.Since(importer.currentstarttime)
				importer.importstatus.mutex.Lock()
				if logstatus {
					importer.importstatus.ImportStatus[filename] = &ImportStatus{
						LinesCount:   lines,
						RecordsCount: records}
				}
				importer.importstatus.TotalUseTime = time.Since(importer.starttime)
				importer.importstatus.mutex.Unlock()
				importer.importstatus.Save()
				logger.Info("file", filename, "imported", records, "records")
			},
		)
		return true
	})
	wg.Wait()
	if e != nil {
		if os.IsNotExist(e) {
			err = merrs.NewError(`directory "`+importer.datapath+`" not exist specified by "datapath"`, e)
		} else {
			err = merrs.NewError(e)
		}
		return
	}
	return
}

func (importer *Importer) ImportFile(filepath string, linefrom, blockfrom int64, logstatus bool) (linecount, blockcount int64, err error) {
	f, e := os.Open(filepath)
	if e != nil {
		return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filepath}})
	}
	defer f.Close()
	return importer.importReader(filepath, f, linefrom, blockfrom, logstatus)
}

func (importer *Importer) importReader(filename string, buf io.Reader, linefrom, blockfrom int64, logstatus bool) (linecount, blockcount int64, err error) {
	var filetype schema.FileType
	switch {
	case strings.Contains(filename, "_L1_"):
		filetype = schema.FT_LEVEL1
	case strings.Contains(filename, "_L2_"):
		filetype = schema.FT_LEVEL2
	case strings.Contains(filename, "_L3_"):
		filetype = schema.FT_LEVEL3
	case strings.Contains(filename, "_L4_"):
		filetype = schema.FT_LEVEL4
	case strings.Contains(filename, "_L5_"):
		filetype = schema.FT_LEVEL5
	case strings.Contains(filename, "_L6_"):
		filetype = schema.FT_LEVEL6
	case strings.Contains(filename, "_L7_"):
		filetype = schema.FT_LEVEL7
	case strings.Contains(filename, "_L8_"):
		filetype = schema.FT_LEVEL8
	case strings.Contains(filename, "MASTER"):
		filetype = schema.FT_MASTER
	case strings.Contains(filename, "EDGE"):
		filetype = schema.FT_EDGE
	default:
		err = merrs.NewError("filename does not conform to the agreed format " + filename)
		return
	}
	br, e := reader.NewBlockReader(filename, filetype, buf)
	if e != nil {
		return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}})
	}
	lastlogtime := time.Now()
	skiplines := int(linefrom)
	blockcount = blockfrom
	doinglines := []int64{}
	var wg sync.WaitGroup
	defer importer.done()
	defer wg.Wait()
	for {
		if err != nil {
			break
		}
		block, line, linenumber, e := br.ReadBlock(skiplines)
		linecount = int64(linenumber)
		if e != nil {
			return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}})
		}
		if block == nil {
			return
		}
		blockcount++
		wg.Add(1)
		doingline := linecount
		doingblock := blockcount
		if logstatus {
			doinglines = append(doinglines, doingline)
		}
		e = importer.odbcqueryrc.ConcurCall(1, func() {
			defer wg.Done()
			e = importer.importRecord(block, line, filename, filetype, int(doingline))
			if e != nil {
				err = merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(doingline)}, {"line": line}})
				return
			}
			if logstatus {
				if doingline == doinglines[0] {
					importer.importstatus.mutex.Lock()
					importer.importstatus.ImportStatus[filename] = &ImportStatus{
						LinesCount:   doingline,
						RecordsCount: doingblock,
					}
					importer.importstatus.TotalUseTime = time.Since(importer.starttime)
					importer.importstatus.Save()
					doinglines = doinglines[1:]
					if time.Since(lastlogtime) > 5*time.Second {
						logger.Info("file", filename, "imported", doingblock, "records")
						lastlogtime = time.Now()
					}
					importer.importstatus.mutex.Unlock()
				} else {
					for i, l := range doinglines {
						if l == doingline {
							doinglines = append(doinglines[:i], doinglines[i+1:]...)
						}
					}
				}
			}
		})
		if e != nil {
			return linecount, blockcount, merrs.NewError(e, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}})
		}
	}
	return
}

func (importer *Importer) importRecord(record map[string]any, line string, filename string, filetype schema.FileType, linecount int) (err error) {
	if odbc.LogDebug {
		bs, e := json.MarshalIndent(record, "", "  ")
		if e != nil {
			return merrs.NewError(e)
		}
		logger.Debug(fmt.Sprint("import ", filename, "[", linecount, "]:", string(bs)))
	}
	var classaliasname string
	switch filetype {
	case schema.FT_EDGE:
		graph.CacheEdgeInfo(record)
	default:
		classaliasname = string(filetype)
		err = importer.odbcimporter.InsertData(classaliasname, record)
		if err != nil {
			err = merrs.NewError(err, merrs.SSMaps{{"filename": filename}, {"linecount": fmt.Sprint(linecount)}, {"line": line}})
			return
		}
	}
	return
}

func (importer *Importer) alldone() {
	importer.odbcimporter.alldone()
}

func (importer *Importer) done() {
	importer.odbcimporter.done()
}

func Check() {
	client := odbc.ODBClient
	if client == nil {
		return
	}
	{
		// mql := "select id,uniqueid,tags,contain,day,vtime from level1 where uniqueid='E2E:OTR0002L'"
		mql := "select * from level1 where uniqueid='E2E:OTR0002L'"
		r, e := client.Query(mql).Do()
		if e != nil {
			panic(merrs.NewError(e))
		}
		bs, _ := json.MarshalIndent(r.Data, "", "  ")
		fmt.Println(string(bs))
	}
	{
		mql := `match ("level1:E2E:OTR0002L")-[*]->(),("level1:E2E:OTR0002L")<-[*]-() in "level1","level2"`
		r, e := client.Query(mql).Do()
		if e != nil {
			panic(merrs.NewError(e))
		}
		bs, _ := json.MarshalIndent(r.Data, "", "  ")
		fmt.Println(string(bs))
	}
}