1 Star 0 Fork 7

dingxiuhao/GTALabel

forked from 连享会/GTALabel 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
GTALabel.ado 13.70 KB
一键复制 编辑 原始数据 按行查看 历史
连享会 提交于 2019-01-06 16:10 . 更新 GTALabel.ado
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
*-本文件夹下有两个参考程序,可以进一步处理里面的 label value 问题
* 需要添加的选项:
/*
- ok Fulllabel : 提取说明文件中的所有 label 信息;
- ok Save() 把处理好的文件存放到一个 dofile 中,用户自定义路径和文件名;
- ok 屏幕显示打开文件的蓝色链接;
- ok nodisplay : 不在屏幕上显示代码
- ok lower: 变量名转为小写
- labelvalue : 用户设定该选项时,只(only)转换【数字-文字对应表】
- ? both: 同时转换变量标签和数字文字对应表 (容易出错)
- do (onestep) : 一次性搞定 -- 转换完label后直接执行,为变量添加标签;
*/
* Puzzle
* RelationtoComCode [出质方] - 1=控股股东;5=董监高;6=其他;若有,逗号隔开
* label define RelationtoComCode 1 "控股股东" 5 "董监高" 6 "其他" 若有多重关系,用半角逗号隔开"
*-update: 2018/11/14 10:35
* 使用 infix 导入原始 txt 文档
*
*-2018/11/28 15:26
* TRD_Co[DES][xlsx].txt 中的 Label value 是用 逗号 分隔的
*! Author: Yujun Lian (连玉君)
*! version 1.1
*! 2019/1/6 9:39
cap program drop GTALabel
program define GTALabel
version 15
syntax anything(id="txt file" name=filesource) ///
[, DO Fulllabel Lower Save(string asis) REPLACE Compress ///
LABelvalue BOTH NODisplay]
preserve
qui{ // qui begin
clear
// delete ""
local filesource = ustrregexra(`"`filesource'"',`"""',"") // stata14 +
* local filesource = subinstr(`"`filesource'"',`"""',"",.) // stata14 + -
// split filepath and filename
*local filesource `"D:\stata15\ado\personal\Net_course_C\C_GTA\data/test[DES][txt].txt"' //temp
*local filesource "test.txt"
*local filesource `"D:\stata15\ado\中国 广东\data/test[DES][txt].txt"'
if strpos(`"`filesource'"', "\") | strpos(`"`filesource'"', "/"){
local p1 = ustrrpos("`a'","\") // stata14 +
local p2 = ustrrpos("`a'","/") // stata14 +
* local p1 = strrpos("`a'","\")
* local p2 = strrpos("`a'","/")
local i = max(`p1', `p2')
}
else{
local i = 0
}
local filepath = substr(`"`filesource'"',1,`i')
local filename_full = substr(`"`filesource'"',`=`i'+1',.)
/*
dis `"`filepath'"' //temp
dis `"`filename_full'"' //temp
*/
*local filename_full "test[DES][txt].txt" //temp
tokenize `"`filename_full'"', parse(.)
local filename_full "`1'"
if strpos(`"`filename_full'"', "[")>0{ //filename[DES][txt]
tokenize `"`filename_full'"', parse([)
local filename_sim "`1'"
local filesource_temp `"`filesource'"'
dis "`filename_sim'" //temp
}
else{ //filename
local filename_sim `"`filename_full'"'
local filename_full `"`filename_sim'[DES][txt]"'
dis `"`filename_full'"' //temp
local filesource_temp `"`filepath'`filename_full'.txt"'
dis "`filesource_temp'" //temp
}
// check if file exist
if !fileexists(`"`filesource_temp'"') {
di as error `"File `filesource' could not be found"'
exit 601
}
// check specification of -labelvalue- and -both- option
if "`labelvalue'"!="" & "`both'"!=""{
dis as error "You can only specify one of -labelvalue- and -both-"
exit
}
// -compress- option
if "`compress'"!=""{
local blank1 ""
local blank2 " "
}
else{
local blank1 " "
local blank2 " "
}
// drop BOM characters (byte order marks)
// see
* https://docs.microsoft.com/en-us/windows/desktop/intl/using-byte-order-marks
* https://en.wikipedia.org/wiki/Byte_order_mark
tempfile filesource_noBOM
filefilter `"`filesource_temp'"' `"`filesource_noBOM'.txt"', from("\EFh\BBh\BFh") to("") replace
/*
tempfile filesource_noBOM f_noBOM1 f_noBOM2
filefilter `"`filesource_temp'"' `"`f_noBOM1'.txt"', from("\BBh") to("") replace
filefilter `"`f_noBOM1'.txt"' `"`f_noBOM2'.txt"', from("\BFh") to("") replace
filefilter `"`f_noBOM2'.txt"' `"`filesource_noBOM'.txt"', from("\EFh") to("") replace
*/
* filefilter `"`f_noBOM2'.txt"' abc123.txt, from("\EFh") to("") replace
/*
. filefilter abc.txt abc_22.txt, from("\239d\187d\191d") to("") replace // 十进制
. filefilter abc.txt abc_22.txt, from("\EFh\BBh\BFh") to("") replace //十六进制
filefilter abc.txt abc_22.txt, from("\BBh") to("") replace
filefilter abc_22.txt abc_23.txt, from("\BFh") to("") replace
filefilter abc_23.txt abc_24.txt, from("\EFh") to("") replace
*/
// input data
* infix strL v 1-1000 using `"`filesource_temp'"', clear
import delimited using `"`filesource_noBOM'.txt"', encoding(utf8) clear
rename v1 v
replace v = strltrim(v) // without leading blanks
tempname data0
save "`data0'", replace
// variable labels
if ("`labelvalue'" ==""){ // & ("`both'"=="")
split v, p([ ]) gen(s)
if "`lower'" != ""{
replace s1 = lower(s1)
}
if "`fulllabel'" != ""{ // fulllabel
replace s2 = s2+s3
}
replace s2 = "`blank1'label var`blank2'" + s1 + `" ""' + s2 + `"""'
keep s2
if "`compress'"==""{
insobs 3, before(1)
replace s2 = "*----------------" in 1
replace s2 = "*-Label Variables" in 2
replace s2 = " " in 3
}
tempname dataLabel
save "`dataLabel'.dta", replace // Data 1: variable labels
}
// value label
if ("`labelvalue'"!="")|("`both'"!=""){
use "`data0'", clear
keep if regexm(v,"[0-9](=|=)") // with label value
keep if regexm(v," - [0-9]") // with label value
// check if value-label defined in TXT file
if _N==0{
dis in red "Warning: No value-label defined in your txt file, e.g."
dis in red "Varname [xxxx] - 1=第一类;2=第二类;3=第三类"
dis in red "You can delete option -labelvalue- in your command"
exit //退出程序
}
replace v = subinstr(v, "-", "", 1)
split v, p([ ]) gen(s)
if "`lower'" != ""{
replace s1 = lower(s1)
}
replace s3 = ustrregexra(s3,"(=|=)",`" ""',.)
replace s3 = ustrregexra(s3,"(;|;)",`"" "',.)
replace s3 = subinstr(s3, "。", "",.)
replace s3 = "`blank1'label define`blank2'" + s1 + s3 + `"""'
gen id0= _n
expand 2
sort id0
bysort id0: gen id12 = _n
replace s3 = "`blank1'label value `blank2'" + s1 + " " + s1 if id12==2
keep s3
rename s3 s2
if "`compress'"==""{
insobs 4, before(1)
replace s2 = " " in 1
replace s2 = "*-------------" in 2
replace s2 = "*-Value labels" in 3
replace s2 = " " in 4
}
else{
insobs 1, before(1)
replace s2 = " " in 1
}
tempname dataLabValue
save "`dataLabValue'.dta", replace // Data 2: value labels
}
// Export data
//Append data1 and data2
if ("`labelvalue'"=="")&("`both'"==""){
use "`dataLabel'.dta", clear
}
if ("`labelvalue'"!=""){
use "`dataLabValue'.dta", clear
}
if "`both'"!=""{
use "`dataLabel'.dta", clear
append using "`dataLabValue'.dta"
}
format s2 %-40s
// export do file
tempname disDOonly
outfile using `"`disDOonly'.do"', noquote wide replace
// display in screen
if "`nodisplay'"==""{
noi dis _n
noi type `"`disDOonly'.do"', asis
}
* set trace on
// do 仍然存在问题,应该是文件里仍然有隐藏字符 2019/1/6 15:14
/*
if "`do'" !=""{
do `"`disDOonly'.do"'
}
*/
*-save():
*- 不设定 save() 选项,默认输出同名 dofile 到当前工作路径下
*- 如果用户设定了后缀,则使用用户的后缀,但只接受 .txt, .do, .md 这三种后缀
*- 如果用户没有使用后缀,则默认输出 .do 文档
* set trace off
if `"`save'"'~="" { // copy from -logout.ado-
* delete ""
* local save = ustrregexra(`"`save'"',`"""',"") // stata14 +
local save = subinstr(`"`save'"',`"""',"",.) // stata14 + -
* assign save name
local beg_dot = index(`"`save'"',".")
if `beg_dot'~=0 {
local suffixname = substr(`"`save'"',`=`beg_dot'+1',.)
if ~inlist("`suffixname'","txt","do","md"){
noi dis in red "Only [ .txt, .do, .md ] files are supported by {opt save()}"
exit
}
local strippedname = substr(`"`save'"',1,`=`beg_dot'-1')
* noi dis "strippedname : " "`strippedname'"
*local save `"`strippedname'.txt"'
}
else {
* `save' has no extension, export .do to current directory
local strippedname `"`save'"'
local suffixname = ".do"
*local save `"`save'.do"'
* noi dis "不加后缀: strippedname : `strippedname' || suffname=`suffixname'"
}
cap confirm file `"`save'"'
if !_rc & "`replace'"~="replace" {
* it exists
noi di in red `"`save' already exists; specify {opt replace}"'
exit 198
}
// export file
local save `"`strippedname'.`suffixname'"'
outfile using `"`save'"', noquote wide replace
/*
*--------------------------去除 BOM 头文件----------begin----------
*-去除 BOM 头文件
tempfile file_noBOM
filefilter `"`save'"' `"`file_noBOM'.`suffixname'"', from("\EFh\BBh\BFh") to("") replace
local save `"`file_noBOM'.`suffixname'"'
shell save `"`file_noBOM'.`suffixname'"' `"`save'"'
*--------------------------去除 BOM 头文件----------over-----------
*/
// view file
if inlist("`suffixname'","txt","md"){
noi dis `"{stata `" view "`save'" "' : open}"'
}
else{
noi dis `"{stata `" doedit "`save'" "' : open}"'
}
// display dir
if strpos(`"`save'"', "\"){
local i = strrpos(`"`save'"', "\")
}
else if strpos(`"`save'"', "/"){
local i = strrpos(`"`save'"', "/")
}
else{
local i = 0
}
local filepath = substr(`"`save'"',1,`i')
if `i'!=0{
noi di `"{browse `"`filepath'"': dir}"'
}
else{
noi di `"{browse `"`c(pwd)'"': dir}"'
}
}
// erase tempfiles
cap erase "`data0'.dta"
cap erase "`dataLabel'.dta"
cap erase "`dataLabValue'.dta"
cap erase "`disDOonly'.do"
} // qui over
restore
end
exit
*--------------------------------------out of use--------------
local lower "lower" // option lower
local lower ""
local fulllabel ""
local fulllabel "full"
global file "test[DES][txt].txt"
local file "$file"
infix strL v 1-1000 using `"`file'"', clear
save data0, replace
split v, p([ ]) gen(s)
if "`lower'" != ""{
replace s1 = lower(s1)
}
if "`fulllabel'" != ""{ // fulllabel
replace s2 = s2+s3
}
replace s2 = " label var " + s1 + `" ""' + s2 + `"""'
*-value label
if "`nolabelvalue'" == ""{
use "`data0'", clear
use data0, clear
keep if regexm(v,"[0-9](=|=)") // with label value
replace v = subinstr(v, "-", "", 1)
split v, p([ ]) gen(s)
if "`lower'" != ""{
replace s1 = lower(s1)
}
replace s3 = ustrregexra(s3,"(=|=)",`" ""',.)
replace s3 = ustrregexra(s3,"(;|;)",`" ""',.)
replace s3 = subinstr(s3, "。", "",.)
replace s3 = " label define " + s1 + s3 + `"""'
gen id0= _n
expand 2
sort id0
bysort id0: gen id12 = _n
replace s3 = " label value " + s1 + " " + s1 if id12==2
list s3, clean noobs compress
tempname dataLabValue
save "`dataLabValue'.dta", replace //to be appended
}
}
*-export txt file
format s3 %-40s
if strpos("`file'", "[")>0{
*local ff = subinstr("`file'", "[", "_", .)
*local ff = subinstr("`ff'", "]", "_", .)
tokenize "`file'", parse([)
local ff "`1'"
}
else{
local ff "`file'"
}
local vorder "a1a s1a a2 s2"
order `x'
qui outfile `vorder' using "`ff'_label_temp.raw", noquote replace wide
filefilter `ff'_label_temp.raw `ff'_label.raw, ///
from(" ") to(" ") replace
erase `ff'_label_temp.raw
cap erase `ff'_label.do
shell rename `ff'_label.raw `ff'_label.do
doedit `ff'_label.do
*-list the results on the screen
local x "a1 v1 s1 a2 s2"
order `x'
list `x', clean noobs noheader //list the results
dis _n
dis _n " *-------------------------------"
dis " *-定义-`file'-文件的变量标签"
dis " do `ff'_label.do //定义变量标签的命令"
*}
local x "s3"
qui outfile `x' using "`ff'_labelvalue_temp.raw", noquote replace wide
filefilter `ff'_labelvalue_temp.raw `ff'_labelvalue.raw, ///
from("^^") to("\n") replace
erase `ff'_labelvalue_temp.raw
cap erase `ff'_label.do
shell rename `ff'_labelvalue.raw `ff'_labelvalue.do
doedit `ff'_labelvalue.do
list s3, clean noobs noheader
*restore
end
*-临时文件
if "`fulllabel'" != ""{ // fulllabel
split v, p("[") gen(s)
replace s2 = `"""'+"["+s2+`"""'
gen s0=" label var "
drop s
order s0 s1 s2
gen s = s0+s1+" "+s2
}
else{
split v, p([ ]) gen(s)
gen v1 = " label var"
if "`lower'" != ""{
replace s1 = lower(s1)
}
gen s1a = v1 + " " + s1
replace s2 = `"""' + s2 + `"""'
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/dingxiuhao/GTALabel.git
git@gitee.com:dingxiuhao/GTALabel.git
dingxiuhao
GTALabel
GTALabel
master

搜索帮助