1 Star 0 Fork 0

谭小盘/xengt-ha-xen-hag

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
xen-vgt.patch 119.90 KB
一键复制 编辑 原始数据 按行查看 历史
libo zhu 提交于 2015-01-06 15:42 . Update: Jan 2015
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842
diff --git a/tools/examples/xmexample.hvm b/tools/examples/xmexample.hvm
index 96b6cc4..19358a4 100644
--- a/tools/examples/xmexample.hvm
+++ b/tools/examples/xmexample.hvm
@@ -350,6 +350,25 @@ tsc_mode=0
# like pci=['xx:xx.x'], it enables graphics passthrough, default=0 (disabled)
#gfx_passthru=0
+# Enable virtual graphics, default is disabled
+#vgt=0
+
+# The low gm size which is CPU visible, default is 64MB.
+#vgt_low_gm_sz=64
+
+# The high gm size which is CPU invisible, default is 448MB.
+#vgt_high_gm_sz=448
+
+# The number of the fence registers, default is 4.
+#vgt_fence_sz=4
+
+# The boolean variable(0|1) means if the vgt device should be the primary VGA.
+# The default is 0. For Windows vgt guest, we usually need to set it to 1 after
+# the gfx driver is installed.
+# If the user doesn't set this variable explicitly, the global setting of the
+# value in the vgt driver is used.
+#vgt_primary=0
+#
#-----------------------------------------------------------------------------
# Configure PVSCSI devices:
#
diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile
index aff8e56..2eb9e60 100644
--- a/tools/firmware/Makefile
+++ b/tools/firmware/Makefile
@@ -19,6 +19,7 @@ ovmf:
seabios-dir:
GIT=$(GIT) $(XEN_ROOT)/scripts/git-checkout.sh $(SEABIOS_UPSTREAM_URL) $(SEABIOS_UPSTREAM_TAG) seabios-dir
+ patch -p1 < $(XEN_ROOT)/tools/firmware/xengt_seabios.patch
cp seabios-config seabios-dir/.config;
.PHONY: all
diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile
index c6e7376..47365db 100644
--- a/tools/firmware/hvmloader/Makefile
+++ b/tools/firmware/hvmloader/Makefile
@@ -111,7 +111,12 @@ ifneq ($(STDVGA_ROM),)
echo "#ifdef ROM_INCLUDE_VGABIOS" >> $@.new
sh ./mkhex vgabios_stdvga $(STDVGA_ROM) >> $@.new
echo "#endif" >> $@.new
+
+ echo "#ifdef SEABIOS_INCLUDE_VGABIOS" >> $@.new
+ sh ./mkhex seabios_vgabios_stdvga $(STDVGA_ROM) >> $@.new
+ echo "#endif" >> $@.new
endif
+
ifneq ($(CIRRUSVGA_ROM),)
echo "#ifdef ROM_INCLUDE_VGABIOS" >> $@.new
sh ./mkhex vgabios_cirrusvga $(CIRRUSVGA_ROM) >> $@.new
diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h
index 6641197..09ab9a9 100644
--- a/tools/firmware/hvmloader/config.h
+++ b/tools/firmware/hvmloader/config.h
@@ -53,7 +53,7 @@ extern struct bios_config ovmf_config;
#define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
-#define PCI_MEM_START 0xf0000000
+#define PCI_MEM_START 0xc0000000
#define PCI_MEM_END 0xfc000000
extern unsigned long pci_mem_start, pci_mem_end;
diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c
index 627e8cb..804565f 100644
--- a/tools/firmware/hvmloader/pci.c
+++ b/tools/firmware/hvmloader/pci.c
@@ -24,6 +24,7 @@
#include "hypercall.h"
#include "config.h"
#include "pci_regs.h"
+#include "vgt.h"
#include <xen/memory.h>
#include <xen/hvm/ioreq.h>
@@ -36,6 +37,9 @@ unsigned long pci_mem_end = PCI_MEM_END;
enum virtual_vga virtual_vga = VGA_none;
unsigned long igd_opregion_pgbase = 0;
+#define VESA_MMIO_RSVD_START 0xe0000000UL
+#define VESA_MMIO_RSVD_END 0xe0130000UL
+
void pci_setup(void)
{
uint8_t is_64bar, using_64bar, bar64_relocate = 0;
@@ -111,35 +115,31 @@ void pci_setup(void)
ASSERT((devfn != PCI_ISA_DEVFN) ||
((vendor_id == 0x8086) && (device_id == 0x7000)));
+ printf("Detect %x.%x devfn, with class: %x\n", devfn>>3, devfn&7, class);
switch ( class )
{
case 0x0300:
/* If emulated VGA is found, preserve it as primary VGA. */
if ( (vendor_id == 0x1234) && (device_id == 0x1111) )
{
+ printf("Detect emulated stdvga\n");
vga_devfn = devfn;
virtual_vga = VGA_std;
}
else if ( (vendor_id == 0x1013) && (device_id == 0xb8) )
{
+ printf("Detect emulated cirrus vga\n");
vga_devfn = devfn;
virtual_vga = VGA_cirrus;
}
- else if ( virtual_vga == VGA_none )
+ else if ( (vendor_id == 0x8086) &&
+ (_is_sandybridge(device_id) || _is_ivybridge(device_id)
+ || _is_haswell(device_id) || _is_broadwell(device_id) )
+ )
{
+ printf("Detect Intel Graphics Device\n");
vga_devfn = devfn;
virtual_vga = VGA_pt;
- if ( vendor_id == 0x8086 )
- {
- igd_opregion_pgbase = mem_hole_alloc(IGD_OPREGION_PAGES);
- /*
- * Write the the OpRegion offset to give the opregion
- * address to the device model. The device model will trap
- * and map the OpRegion at the give address.
- */
- pci_writel(vga_devfn, PCI_INTEL_OPREGION,
- igd_opregion_pgbase << PAGE_SHIFT);
- }
}
break;
case 0x0680:
@@ -272,6 +272,13 @@ void pci_setup(void)
unsigned int,
hvm_info->low_mem_pgend - (pci_mem_start >> PAGE_SHIFT),
(1u << 16) - 1);
+
+ /* This is a temporary check and will be removed once upstream Xen
+ * fix the issue "QEMU hardcodes PCI MMIO base.
+ */
+ printf("can not find a big enough MMIO hole!!!\n");
+ BUG();
+
if ( hvm_info->high_mem_pgend == 0 )
hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
hvm_info->low_mem_pgend -= nr_pages;
@@ -366,6 +373,15 @@ void pci_setup(void)
base = (resource->base + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
bar_data |= (uint32_t)base;
bar_data_upper = (uint32_t)(base >> 32);
+
+ /* Skip allocate the reserved range by vesafb */
+ if (resource == &mem_resource &&
+ (base + bar_sz > VESA_MMIO_RSVD_START) && (base < VESA_MMIO_RSVD_END)) {
+ resource->base = VESA_MMIO_RSVD_END;
+ base = (resource->base + bar_sz - 1) & ~(uint64_t)(bar_sz - 1);
+ bar_data |= (uint32_t)base;
+ }
+
base += bar_sz;
if ( (base < resource->base) || (base > resource->max) )
@@ -400,6 +416,47 @@ void pci_setup(void)
if ( vga_devfn != 256 )
{
+ if (virtual_vga == VGA_pt) {
+ uint32_t bar = pci_readl(vga_devfn, PCI_BASE_ADDRESS_0)
+ & PCI_BASE_ADDRESS_MEM_MASK;
+
+ void *pvinfo = (void *)bar + VGT_PVINFO_PAGE;
+ uint64_t *magic = pvinfo;
+
+ if (*magic == VGT_MAGIC) {
+ /*
+ * Found VGT device, and use standard VGA bios.
+ */
+ printf("Found VGT\n");
+ virtual_vga = VGA_std;
+
+ /* XXX: we use this hack to tell vGT driver the
+ * top of <4G mem, so vGT can avoid unnecessary
+ * attempts to map the mem hole. This optimization
+ * can speed up guest bootup time and improve Win7
+ * SMP guest's stability.
+ * NOTE: here we're actually trying to write 32 bits
+ * into VENDOR_ID and DEVICE_ID -- we assume normally
+ * sane codes in guest won't do this...
+ */
+ pci_writel(vga_devfn, PCI_VENDOR_ID, hvm_info->low_mem_pgend);
+ } else {
+ /*
+ * Found VTD device, and use physical VGA bios.
+ */
+ printf("Found VTD\n");
+ }
+
+ igd_opregion_pgbase = mem_hole_alloc(2);
+ /*
+ * Write the the OpRegion offset to give the opregion
+ * address to the device model. The device model will trap
+ * and map the OpRegion at the give address.
+ */
+ pci_writel(vga_devfn, PCI_INTEL_OPREGION,
+ igd_opregion_pgbase << PAGE_SHIFT);
+ }
+
/*
* VGA registers live in I/O space so ensure that primary VGA
* has IO enabled, even if there is no I/O BAR on that
diff --git a/tools/firmware/hvmloader/seabios.c b/tools/firmware/hvmloader/seabios.c
index dd7dfbe..6938ef8 100644
--- a/tools/firmware/hvmloader/seabios.c
+++ b/tools/firmware/hvmloader/seabios.c
@@ -29,6 +29,7 @@
#include "acpi/acpi2_0.h"
#define ROM_INCLUDE_SEABIOS
+#define SEABIOS_INCLUDE_VGABIOS
#include "roms.inc"
extern unsigned char dsdt_anycpu_qemu_xen[];
@@ -133,6 +134,20 @@ static void seabios_setup_e820(void)
dump_e820_table(e820, info->e820_nr);
}
+//BUILD_BUG_ON(sizeof(seabios) > (0x00100000U - SEABIOS_PHYSICAL_ADDRESS));
+
+#ifdef SEABIOS_INCLUDE_VGABIOS
+static void seabios_load_roms(void)
+{
+ if (virtual_vga != VGA_std)
+ return;
+
+ printf("Loading Standard VGABIOS ...\n");
+ memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
+ seabios_vgabios_stdvga, sizeof(seabios_vgabios_stdvga));
+}
+#endif
+
struct bios_config seabios_config = {
.name = "SeaBIOS",
@@ -141,7 +156,11 @@ struct bios_config seabios_config = {
.bios_address = 0x100000 - sizeof(seabios),
+#ifdef SEABIOS_INCLUDE_VGABIOS
+ .load_roms = seabios_load_roms,
+#else
.load_roms = NULL,
+#endif
.bios_load = NULL,
diff --git a/tools/firmware/hvmloader/vgt.h b/tools/firmware/hvmloader/vgt.h
new file mode 100644
index 0000000..49d3d60
--- /dev/null
+++ b/tools/firmware/hvmloader/vgt.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2012-2013, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _VGT_DEVTABLE_H
+#define _VGT_DEVTABLE_H
+
+#define VGT_PVINFO_PAGE 0x78000
+#define VGT_MAGIC 0x4776544776544776 /* 'vGTvGTvG' */
+#define VGT_VERSION_MAJOR 1
+#define VGT_VERSION_MINOR 0
+
+static inline int _is_sandybridge(int devid)
+{
+ int ret = 0;
+
+ switch (devid) {
+ case 0x0102:
+ case 0x0112:
+ case 0x0122:
+ case 0x0106:
+ case 0x0116:
+ case 0x0126:
+ case 0x010A:
+ ret = 1;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static inline int _is_ivybridge(int devid)
+{
+ int ret = 0;
+
+ switch (devid) {
+ case 0x0156:
+ case 0x0166:
+ case 0x0152:
+ case 0x0162:
+ case 0x015a:
+ case 0x016a:
+ ret = 1;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static inline int _is_haswell(int devid)
+{
+ int ret = 0;
+
+ switch (devid) {
+ case 0x0400:
+ case 0x0402:
+ case 0x0404:
+ case 0x0406:
+ case 0x0408:
+ case 0x040a:
+ case 0x0412:
+ case 0x0416:
+ case 0x041a:
+ case 0x0422:
+ case 0x0426:
+ case 0x042a:
+ case 0x0a02:
+ case 0x0a06:
+ case 0x0a0a:
+ case 0x0a12:
+ case 0x0a16:
+ case 0x0a1a:
+ case 0x0a22:
+ case 0x0a26:
+ case 0x0a2a:
+ case 0x0c02:
+ case 0x0c04:
+ case 0x0c06:
+ case 0x0c0a:
+ case 0x0c12:
+ case 0x0c16:
+ case 0x0c1a:
+ case 0x0c22:
+ case 0x0c26:
+ case 0x0c2a:
+ case 0x0d12:
+ case 0x0d16:
+ case 0x0d1a:
+ case 0x0d22:
+ case 0x0d26:
+ case 0x0d2a:
+ case 0x0d32:
+ case 0x0d36:
+ case 0x0d3a:
+ ret = 1;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+static inline int _is_broadwell(int devid)
+{
+ switch ((devid >> 4) & 0xf) {
+ case 0:
+ case 1:
+ case 2:
+ break;
+ default:
+ return 0;
+ }
+
+ devid &= ~0xf0;
+
+ switch (devid) {
+ case 0x1602:
+ case 0x1606:
+ case 0x160B:
+ case 0x160E:
+ case 0x160A:
+ case 0x160D:
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* _VGT_DEVTABLE_H */
diff --git a/tools/firmware/vgabios/Makefile b/tools/firmware/vgabios/Makefile
index 26bb871..6ff921b 100644
--- a/tools/firmware/vgabios/Makefile
+++ b/tools/firmware/vgabios/Makefile
@@ -37,7 +37,7 @@ release:
tar czvf ../$(RELEASE).tgz --exclude CVS -C .. $(RELEASE)/
vgabios.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h
- $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE $(VGABIOS_DATE) > _vgabios_.c
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DVGT $(VGABIOS_DATE) > _vgabios_.c
$(BCC) -o vgabios.s -C-c -D__i86__ -S -0 _vgabios_.c
sed -e 's/^\.text//' -e 's/^\.data//' vgabios.s > _vgabios_.s
$(AS86) _vgabios_.s -b vgabios.bin -u -w- -g -0 -j -O -l vgabios.txt
@@ -47,7 +47,7 @@ vgabios.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbe
ls -l VGABIOS-lgpl-latest.bin
vgabios.debug.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h
- $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DDEBUG $(VGABIOS_DATE) > _vgabios-debug_.c
+ $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DVGT -DDEBUG $(VGABIOS_DATE) > _vgabios-debug_.c
$(BCC) -o vgabios-debug.s -C-c -D__i86__ -S -0 _vgabios-debug_.c
sed -e 's/^\.text//' -e 's/^\.data//' vgabios-debug.s > _vgabios-debug_.s
$(AS86) _vgabios-debug_.s -b vgabios.debug.bin -u -w- -g -0 -j -O -l vgabios.debug.txt
diff --git a/tools/firmware/vgabios/vgabios.c b/tools/firmware/vgabios/vgabios.c
index a9dbe00..c81d7cd 100644
--- a/tools/firmware/vgabios/vgabios.c
+++ b/tools/firmware/vgabios/vgabios.c
@@ -149,6 +149,12 @@ vgabios_entry_point:
jmp vgabios_init_func
+/*
+ * GEN Windows driver assume the identifying string
+ * to be located from the VBIOS offset 6.
+ * Remove the following bytes to make Windows happy for VGT.
+ */
+#ifndef VGT
#ifdef PCIBIOS
.org 0x18
.word vgabios_pci_data
@@ -158,6 +164,7 @@ vgabios_entry_point:
.org 0x1e
.ascii "IBM"
.byte 0x00
+#endif /* VGT */
vgabios_name:
.ascii "Plex86/Bochs VGABios"
diff --git a/tools/firmware/xengt_seabios.patch b/tools/firmware/xengt_seabios.patch
new file mode 100644
index 0000000..d917519
--- /dev/null
+++ b/tools/firmware/xengt_seabios.patch
@@ -0,0 +1,12 @@
+--- a/seabios-dir/src/optionroms.c
++++ b/seabios-dir/src/optionroms.c
+@@ -478,7 +478,8 @@ vga_setup(void)
+ S3ResumeVgaInit = romfile_loadint("etc/s3-resume-vga-init", 0);
+ ScreenAndDebug = romfile_loadint("etc/screen-and-debug", 1);
+
+- if (CONFIG_OPTIONROMS_DEPLOYED) {
++ if (CONFIG_OPTIONROMS_DEPLOYED ||
++ ((*(u16 *)BUILD_ROM_START) == 0xaa55)) {
+ // Option roms are already deployed on the system.
+ init_optionrom((void*)BUILD_ROM_START, 0, 1);
+ } else {
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 826236f..0530303 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1285,6 +1285,29 @@ static void domain_destroy_callback(libxl__egc *egc,
static void destroy_finish_check(libxl__egc *egc,
libxl__domain_destroy_state *dds);
+// We don't care the return value:
+// 1) the guest may not be a VGT guest;
+// 2) normally when a VGT guest shutdown, the ioemu has already tried to
+// destroy the vgt instance and we shouldn't come here by "xl dest dom_id".
+// 3) we come here because the ioemu didn't destroy the vgt instance
+// successfully(e.g., ioemu exits abnormally) or we want to kill the guest by
+// force while it's running. In this case, we still try our best to destroy
+// the vgt instance.
+static void destroy_vgt_instance(int domid)
+{
+ const char *path = "/sys/kernel/vgt/control/create_vgt_instance";
+ FILE *vgt_file;
+
+ if (domid <= 0)
+ return;
+
+ if ((vgt_file = fopen(path, "w")) == NULL)
+ return;
+
+ (void)fprintf(vgt_file, "%d\n", -domid);
+ (void)fclose(vgt_file);
+}
+
void libxl__domain_destroy(libxl__egc *egc, libxl__domain_destroy_state *dds)
{
STATE_AO_GC(dds->ao);
@@ -1467,6 +1490,8 @@ static void devices_destroy_cb(libxl__egc *egc,
libxl__userdata_destroyall(gc, domid);
+ destroy_vgt_instance(domid);
+
rc = xc_domain_destroy(ctx->xch, domid);
if (rc < 0) {
LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_destroy failed for %d", domid);
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 0c32d0b..bf29699 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -275,7 +275,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
}
libxl_defbool_setdefault(&b_info->u.hvm.nographic, false);
-
+ libxl_defbool_setdefault(&b_info->u.hvm.vgt, false);
libxl_defbool_setdefault(&b_info->u.hvm.gfx_passthru, false);
break;
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 7e54c02..ded1bf9 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -185,12 +185,36 @@ static char ** libxl__build_device_model_args_old(libxl__gc *gc,
NULL);
}
- switch (b_info->u.hvm.vga.kind) {
- case LIBXL_VGA_INTERFACE_TYPE_STD:
- flexarray_append(dm_args, "-std-vga");
- break;
- case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
- break;
+ if (libxl_defbool_val(b_info->u.hvm.vgt)) {
+ flexarray_vappend(dm_args, "-vga", "xengt", NULL);
+ if (b_info->u.hvm.vgt_low_gm_sz) {
+ flexarray_vappend(dm_args, "-vgt_low_gm_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_low_gm_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_high_gm_sz) {
+ flexarray_vappend(dm_args, "-vgt_high_gm_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_high_gm_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_fence_sz) {
+ flexarray_vappend(dm_args, "-vgt_fence_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_fence_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_primary != -1) {
+ flexarray_vappend(dm_args, "-vgt_primary",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_primary), NULL);
+ }
+ if (b_info->u.hvm.vgt_monitor_config_file) {
+ flexarray_vappend(dm_args, "-vgt_monitor_config_file",
+ libxl__sprintf(gc, "%s", b_info->u.hvm.vgt_monitor_config_file), NULL);
+ }
+ } else {
+ switch (b_info->u.hvm.vga.kind) {
+ case LIBXL_VGA_INTERFACE_TYPE_STD:
+ flexarray_append(dm_args, "-std-vga");
+ break;
+ case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
+ break;
+ }
}
if (b_info->u.hvm.boot) {
@@ -465,19 +489,44 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
flexarray_append(dm_args, spiceoptions);
}
- switch (b_info->u.hvm.vga.kind) {
- case LIBXL_VGA_INTERFACE_TYPE_STD:
- flexarray_vappend(dm_args, "-vga", "std", NULL);
- break;
- case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
- flexarray_vappend(dm_args, "-vga", "cirrus", NULL);
- if (b_info->video_memkb) {
- flexarray_vappend(dm_args, "-global",
- GCSPRINTF("vga.vram_size_mb=%d",
- libxl__sizekb_to_mb(b_info->video_memkb)), NULL);
+ /* TODO: some vga options are exclusive */
+ if (libxl_defbool_val(b_info->u.hvm.vgt)) {
+ flexarray_vappend(dm_args, "-vga", "xengt", NULL);
+ if (b_info->u.hvm.vgt_low_gm_sz) {
+ flexarray_vappend(dm_args, "-vgt_low_gm_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_low_gm_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_high_gm_sz) {
+ flexarray_vappend(dm_args, "-vgt_high_gm_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_high_gm_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_fence_sz) {
+ flexarray_vappend(dm_args, "-vgt_fence_sz",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_fence_sz), NULL);
+ }
+ if (b_info->u.hvm.vgt_primary != -1) {
+ flexarray_vappend(dm_args, "-vgt_primary",
+ libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_primary), NULL);
}
- break;
- }
+ if (b_info->u.hvm.vgt_monitor_config_file) {
+ flexarray_vappend(dm_args, "-vgt_monitor_config_file",
+ libxl__sprintf(gc, "%s", b_info->u.hvm.vgt_monitor_config_file), NULL);
+ }
+ } else {
+ switch (b_info->u.hvm.vga.kind) {
+ case LIBXL_VGA_INTERFACE_TYPE_STD:
+ flexarray_vappend(dm_args, "-vga", "std", NULL);
+ break;
+ case LIBXL_VGA_INTERFACE_TYPE_CIRRUS:
+ flexarray_vappend(dm_args, "-vga", "cirrus", NULL);
+ if (b_info->video_memkb) {
+ flexarray_vappend(dm_args, "-global",
+ GCSPRINTF("vga.vram_size_mb=%d",
+ libxl__sizekb_to_mb(b_info->video_memkb)), NULL);
+ }
+ break;
+ }
+ }
if (b_info->u.hvm.boot) {
flexarray_vappend(dm_args, "-boot",
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index d218a2d..6af715c 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -23,6 +23,7 @@ libxl_hwcap = Builtin("hwcap", passby=PASS_BY_REFERENCE)
#
MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT")
+VgtInt = UInt(32, init_val = "0UL")
#
# Constants / Enumerations
@@ -314,12 +315,18 @@ libxl_domain_build_info = Struct("domain_build_info",[
("acpi_firmware", string),
("nographic", libxl_defbool),
("vga", libxl_vga_interface_info),
+ ("vgt", libxl_defbool),
+ ("vgt_low_gm_sz", VgtInt),
+ ("vgt_high_gm_sz", VgtInt),
+ ("vgt_fence_sz", VgtInt),
+ ("vgt_primary", VgtInt),
+ ("vgt_monitor_config_file", string),
("vnc", libxl_vnc_info),
# keyboard layout, default is en-us keyboard
("keymap", string),
("sdl", libxl_sdl_info),
("spice", libxl_spice_info),
-
+
("gfx_passthru", libxl_defbool),
("serial", string),
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index f5943a4..7885671 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -1464,6 +1464,18 @@ skip_vfb:
b_info->u.hvm.vga.kind = l ? LIBXL_VGA_INTERFACE_TYPE_STD :
LIBXL_VGA_INTERFACE_TYPE_CIRRUS;
+ xlu_cfg_get_defbool(config, "vgt", &b_info->u.hvm.vgt, 0);
+ if(!xlu_cfg_get_long(config, "vgt_low_gm_sz", &l, 0))
+ b_info->u.hvm.vgt_low_gm_sz = l;
+ if(!xlu_cfg_get_long(config, "vgt_high_gm_sz", &l, 0))
+ b_info->u.hvm.vgt_high_gm_sz = l;
+ if(!xlu_cfg_get_long(config, "vgt_fence_sz", &l, 0))
+ b_info->u.hvm.vgt_fence_sz = l;
+ if(!xlu_cfg_get_long(config, "vgt_primary", &l, 0))
+ b_info->u.hvm.vgt_primary = l;
+ else
+ b_info->u.hvm.vgt_primary = -1; /* not specified */
+ xlu_cfg_replace_string (config, "vgt_monitor_config_file", &b_info->u.hvm.vgt_monitor_config_file, 0);
xlu_cfg_get_defbool(config, "vnc", &b_info->u.hvm.vnc.enable, 0);
xlu_cfg_replace_string (config, "vnclisten",
&b_info->u.hvm.vnc.listen, 0);
diff --git a/tools/libxl/xl_sxp.c b/tools/libxl/xl_sxp.c
index a16a025..345c6d0 100644
--- a/tools/libxl/xl_sxp.c
+++ b/tools/libxl/xl_sxp.c
@@ -113,6 +113,8 @@ void printf_info_sexp(int domid, libxl_domain_config *d_config)
printf("\t\t\t(stdvga %s)\n", b_info->u.hvm.vga.kind ==
LIBXL_VGA_INTERFACE_TYPE_STD ?
"True" : "False");
+ printf("\t\t\t(vgt %s)\n",
+ libxl_defbool_to_string(b_info->u.hvm.vgt));
printf("\t\t\t(vnc %s)\n",
libxl_defbool_to_string(b_info->u.hvm.vnc.enable));
printf("\t\t\t(vnclisten %s)\n", b_info->u.hvm.vnc.listen);
diff --git a/tools/tests/Makefile b/tools/tests/Makefile
index adeb120..f94bbde 100644
--- a/tools/tests/Makefile
+++ b/tools/tests/Makefile
@@ -5,7 +5,7 @@ CFLAGS += $(CFLAGS_libxenctrl)
LDLIBS += $(LDLIBS_libxenctrl)
SUBDIRS-y :=
-SUBDIRS-$(CONFIG_X86) += mce-test
+#SUBDIRS-$(CONFIG_X86) += mce-test
SUBDIRS-y += mem-sharing
ifeq ($(XEN_TARGET_ARCH),__fixme__)
SUBDIRS-y += regression
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index d502bdf..e8a3c4b 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -58,6 +58,7 @@ obj-y += crash.o
obj-y += tboot.o
obj-y += hpet.o
obj-y += xstate.o
+obj-y += vgt.o
obj-$(crash_debug) += gdbstub.o
diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index f41f0de..d36b93f 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -30,6 +30,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <acpi/cpufreq/cpufreq.h>
+#include <asm/vgt.h>
uint32_t system_reset_counter = 1;
@@ -73,6 +74,8 @@ static void device_power_up(void)
time_resume();
console_resume();
+
+ vgt_resume();
}
static void freeze_domains(void)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 146fb9f..5b6f8fb 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -60,6 +60,7 @@
#include <xen/numa.h>
#include <xen/iommu.h>
#include <compat/vcpu.h>
+#include <asm/vgt.h>
DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
DEFINE_PER_CPU(unsigned long, cr4);
@@ -469,6 +470,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
(domcr_flags & DOMCRF_hap);
d->arch.hvm_domain.mem_sharing_enabled = 0;
+ d->arch.hvm_domain.vgt_enabled = 0;
+
d->arch.s3_integrity = !!(domcr_flags & DOMCRF_s3_integrity);
INIT_LIST_HEAD(&d->arch.pdev_list);
@@ -532,6 +535,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
d->arch.ioport_caps =
rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ d->arch.ioport_forwarding_caps =
+ rangeset_new(d, "I/O Ports Self-forwarding", RANGESETF_prettyprint_hex);
rc = -ENOMEM;
if ( d->arch.ioport_caps == NULL )
goto fail;
@@ -1012,6 +1017,48 @@ arch_do_vcpu_op(
break;
}
+ case VCPUOP_request_io_emulation:
+ {
+ struct vcpu_emul_ioreq ioreq;
+
+ rc = -EFAULT;
+ if (dom0 != v->domain) /* only for dom0 */
+ break;
+ if ( copy_from_guest(&ioreq, arg, 1) )
+ break;
+
+ rc = -EINVAL;
+ if ( !hypercall_io_emulation(v, &ioreq) )
+ break;
+
+ rc = -EFAULT;
+ if ( copy_to_guest(arg, &ioreq, 1) )
+ break;
+ rc = 0;
+ break;
+ }
+
+ case VCPUOP_get_sysdata:
+ {
+ struct vcpu_sysdata_request req;
+
+ rc = -EFAULT;
+ if (dom0 != v->domain) /* only for dom0 */
+ break;
+ if ( copy_from_guest(&req, arg, 1) )
+ break;
+
+ rc = -EINVAL;
+ if ( !hypercall_get_sysdata(v, &req) )
+ break;
+
+ rc = -EFAULT;
+ if ( copy_to_guest(arg, &req, 1) )
+ break;
+ rc = 0;
+ break;
+ }
+
default:
rc = -ENOSYS;
break;
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 8365f32..ef91981 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -35,6 +35,7 @@
#include <asm/setup.h>
#include <asm/bzimage.h> /* for bzimage_parse */
#include <asm/io_apic.h>
+#include <asm/vgt.h>
#include <public/version.h>
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index e75918a..f0bd9ee 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -35,6 +35,7 @@
#include <asm/mem_sharing.h>
#include <asm/xstate.h>
#include <asm/debugger.h>
+#include <asm/vgt.h>
static int gdbsx_guest_mem_io(
domid_t domid, struct xen_domctl_gdbsx_memio *iop)
@@ -641,7 +642,8 @@ long arch_do_domctl(
if ( ret )
break;
- if ( add )
+ ret=0;
+ if ( add == DPCI_ADD_MAPPING )
{
printk(XENLOG_G_INFO
"memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n",
@@ -1223,6 +1225,19 @@ long arch_do_domctl(
}
break;
+ case XEN_DOMCTL_vgt_io_trap:
+ {
+ struct xen_domctl_vgt_io_trap *info = &domctl->u.vgt_io_trap;
+
+ if (!info->n_pio || !info->n_mmio)
+ copyback = 1;
+
+ ret = vgt_io_trap(d, info);
+ if (ret)
+ break;
+ }
+ break;
+
default:
ret = iommu_do_domctl(domctl, d, u_domctl);
break;
diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
index eea5555..765a09f 100644
--- a/xen/arch/x86/hvm/Makefile
+++ b/xen/arch/x86/hvm/Makefile
@@ -22,4 +22,5 @@ obj-y += vlapic.o
obj-y += vmsi.o
obj-y += vpic.o
obj-y += vpt.o
-obj-y += vpmu.o
\ No newline at end of file
+obj-y += vpmu.o
+obj-y += vgt.o
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index b206997..44877fa 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -61,6 +61,7 @@ static int hvmemul_do_io(
unsigned long ram_gfn = paddr_to_pfn(ram_gpa);
p2m_type_t p2mt;
struct page_info *ram_page;
+ int do_mmio_split = (is_mmio && !value_is_ptr && size > sizeof(p->data));
int rc;
/* Check for paged out page */
@@ -83,25 +84,62 @@ static int hvmemul_do_io(
* Weird-sized accesses have undefined behaviour: we discard writes
* and read all-ones.
*/
- if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
+ if ( unlikely((!do_mmio_split && size > sizeof(long))
+ || (do_mmio_split && size > MAX_INS_EMULATE_MMIO_SIZE)
+ || (size & (size - 1))) )
{
gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
ASSERT(p_data != NULL); /* cannot happen with a REP prefix */
if ( dir == IOREQ_READ )
memset(p_data, ~0, size);
- if ( ram_page )
- put_page(ram_page);
- return X86EMUL_UNHANDLEABLE;
+ goto out_unhandleable;
+ }
+
+ vio = &curr->arch.hvm_vcpu.hvm_io;
+
+ if ( unlikely(do_mmio_split) )
+ {
+ if ( !vio->mmio_split )
+ {
+ /* Beginning of a split MMIO emulation. */
+ if ( vio->io_state != HVMIO_none )
+ {
+ gdprintk(XENLOG_WARNING, "Split MMIO emulation:\n");
+ gdprintk(XENLOG_WARNING, "Invalid IO state when trying to begin a split MMIO emulation.\n");
+ goto out_unhandleable;
+ }
+
+ vio->mmio_split = 1;
+
+ vio->mmio_split_pa = addr;
+ vio->mmio_split_size = sizeof(p->data);
+ vio->mmio_split_done_size = 0;
+ vio->mmio_split_dir = dir;
+
+ /* Load write buffer. */
+ if ( dir == IOREQ_WRITE )
+ memcpy(vio->mmio_split_buf, p_data, vio->io_size);
+ }
}
if ( (p_data != NULL) && (dir == IOREQ_WRITE) )
{
- memcpy(&value, p_data, size);
+ /*
+ * Enter here at:
+ * Beginning of a split MMIO emulation
+ * -> Load value from write buffer for the first round ioreq_t,
+ * End of a split MMIO emulation
+ * -> Do nothing.
+ */
+ if ( unlikely(vio->mmio_split && vio->mmio_split_done_size != vio->io_size) )
+ memcpy(&value, vio->mmio_split_buf + vio->mmio_split_done_size,
+ vio->mmio_split_size);
+ else
+ memcpy(&value, p_data, size);
+
p_data = NULL;
}
- vio = &curr->arch.hvm_vcpu.hvm_io;
-
if ( is_mmio && !value_is_ptr )
{
/* Part of a multi-cycle read or write? */
@@ -137,7 +175,7 @@ static int hvmemul_do_io(
break;
case HVMIO_completed:
vio->io_state = HVMIO_none;
- if ( p_data == NULL )
+ if ( p_data == NULL && !vio->mmio_split )
{
if ( ram_page )
put_page(ram_page);
@@ -173,14 +211,30 @@ static int hvmemul_do_io(
(p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
vio->io_size = size;
+ if ( !vio->mmio_split )
+ {
+ p->data_is_ptr = value_is_ptr;
+ p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
+ p->size = size;
+ p->addr = addr;
+ p->count = *reps;
+ }
+ else
+ {
+ /*
+ * The first ioreq_t of a split MMIO emulation.
+ */
+ p->data_is_ptr = 0;
+ p->type = IOREQ_TYPE_COPY;
+ p->size = vio->mmio_split_size;
+ p->addr = vio->mmio_split_pa;
+ p->count = 1;
+ }
+
p->dir = dir;
- p->data_is_ptr = value_is_ptr;
- p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
- p->size = size;
- p->addr = addr;
- p->count = *reps;
p->df = df;
p->data = value;
+ p->is_vgt = 0;
if ( dir == IOREQ_WRITE )
hvmtrace_io_assist(is_mmio, p);
@@ -209,7 +263,7 @@ static int hvmemul_do_io(
rc = X86EMUL_RETRY;
if ( !hvm_send_assist_req(curr) )
vio->io_state = HVMIO_none;
- else if ( p_data == NULL )
+ else if ( p_data == NULL && !vio->mmio_split )
rc = X86EMUL_OKAY;
break;
default:
@@ -227,8 +281,30 @@ static int hvmemul_do_io(
if ( dir == IOREQ_READ )
hvmtrace_io_assist(is_mmio, p);
- if ( p_data != NULL )
- memcpy(p_data, &vio->io_data, size);
+ if ( unlikely(vio->mmio_split) )
+ {
+ if ( vio->io_size != vio->mmio_split_done_size )
+ {
+ gdprintk(XENLOG_WARNING, "Split MMIO emulation:\n");
+ gdprintk(XENLOG_WARNING, "Finish a uncompleted split MMIO emulation.\n");
+ gdprintk(XENLOG_WARNING, "vio->io_size %d, vio->mmio_split_done_size %d.\n",
+ vio->io_size, vio->mmio_split_done_size);
+
+ vio->mmio_split = 0;
+ goto out_unhandleable;
+ }
+
+ if ( vio->mmio_split_dir == IOREQ_READ )
+ memcpy(p_data, vio->mmio_split_buf, size);
+
+ /* This is the end of a split MMIO emulation. */
+ vio->mmio_split = 0;
+ }
+ else
+ {
+ if ( p_data != NULL )
+ memcpy(p_data, &vio->io_data, size);
+ }
if ( is_mmio && !value_is_ptr )
{
@@ -261,6 +337,11 @@ static int hvmemul_do_io(
if ( ram_page )
put_page(ram_page);
return X86EMUL_OKAY;
+
+out_unhandleable:
+ if ( ram_page )
+ put_page(ram_page);
+ return X86EMUL_UNHANDLEABLE;
}
int hvmemul_do_pio(
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4cf503b..e0700f9 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -59,6 +59,7 @@
#include <asm/hvm/nestedhvm.h>
#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/vgt.h>
#include <public/sched.h>
#include <public/hvm/ioreq.h>
#include <public/version.h>
@@ -357,7 +358,8 @@ void hvm_do_resume(struct vcpu *v)
break;
case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
case STATE_IOREQ_INPROCESS:
- wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port,
+ wait_on_xen_event_channel(p->is_vgt ? v->arch.hvm_vcpu.vgt_port :
+ v->arch.hvm_vcpu.xen_port,
(p->state != STATE_IOREQ_READY) &&
(p->state != STATE_IOREQ_INPROCESS));
break;
@@ -524,6 +526,7 @@ int hvm_domain_initialise(struct domain *d)
spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
spin_lock_init(&d->arch.hvm_domain.irq_lock);
spin_lock_init(&d->arch.hvm_domain.uc_lock);
+ spin_lock_init(&d->arch.hvm_domain.vgt_wp_hash_lock);
INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
@@ -612,6 +615,7 @@ void hvm_domain_destroy(struct domain *d)
rtc_deinit(d);
stdvga_deinit(d);
vioapic_deinit(d);
+ vgt_hvm_deinit(d);
hvm_destroy_cacheattr_region_list(d);
}
@@ -1094,6 +1098,119 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void)
}
__initcall(__hvm_register_CPU_XSAVE_save_and_restore);
+static int hvm_mmio_split_check_status(struct vcpu *v)
+{
+ struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
+ ioreq_t *p = get_ioreq(v);
+
+ if ( p->state != STATE_IORESP_READY )
+ {
+ gdprintk(XENLOG_WARNING, "The state of ioreq isn't STATE_IORESP_READY.\n");
+ goto invalid;
+ }
+
+ if ( p->dir != vio->mmio_split_dir )
+ {
+ gdprintk(XENLOG_WARNING, "The direction of ioreq isn't same as mmio_split_dir.\n");
+ goto invalid;
+ }
+
+ if ( p->data_is_ptr
+ || p->type != IOREQ_TYPE_COPY
+ || p->size != vio->mmio_split_size
+ || p->count != 1 )
+ {
+ gdprintk(XENLOG_WARNING, "The configuration of ioreq is invalid.\n");
+ goto invalid;
+ }
+
+ if ( !test_bit(_VPF_blocked_in_xen, &v->pause_flags) )
+ {
+ gdprintk(XENLOG_WARNING, "The state of target VCPU isn't _VPF_blocked_in_xen.\n");
+ goto invalid;
+ }
+
+ return 1;
+
+invalid:
+ return 0;
+}
+
+static void hvm_io_notification_fn(struct vcpu *v, unsigned int port)
+{
+ struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io;
+ struct hvm_ioreq_page *iorp = &v->domain->arch.hvm_domain.ioreq;
+ ioreq_t *p;
+
+ int mmio_split_finished;
+
+ if ( !vio->mmio_split )
+ {
+ /* Consumer needs notification only if blocked. */
+ if ( test_and_clear_bit(_VPF_blocked_in_xen, &v->pause_flags) )
+ vcpu_wake(v);
+
+ return;
+ }
+
+ spin_lock(&iorp->lock);
+
+ p = get_ioreq(v);
+
+ if ( !hvm_mmio_split_check_status(v) )
+ {
+ gdprintk(XENLOG_WARNING, "The status of split MMIO is invalid.\n");
+ vio->mmio_split = 0;
+
+ goto out_unlock;
+ }
+
+ if ( vio->mmio_split_dir == IOREQ_READ )
+ {
+ memcpy(vio->mmio_split_buf + vio->mmio_split_done_size,
+ &p->data, vio->mmio_split_size);
+
+ vio->mmio_split_done_size += vio->mmio_split_size;
+
+ mmio_split_finished = (vio->mmio_split_done_size == vio->io_size);
+ }
+ else
+ {
+ vio->mmio_split_done_size += vio->mmio_split_size;
+
+ mmio_split_finished = (vio->mmio_split_done_size == vio->io_size);
+
+ if ( !mmio_split_finished )
+ memcpy(&p->data, vio->mmio_split_buf + vio->mmio_split_done_size,
+ vio->mmio_split_size);
+ }
+
+ if ( !mmio_split_finished )
+ {
+ /* Trigger next round ioreq_t. */
+ p->state = STATE_IOREQ_READY;
+
+ p->addr = vio->mmio_split_pa + vio->mmio_split_done_size;
+
+ spin_unlock(&iorp->lock);
+
+ __notify_via_xen_event_channel(v->domain, port);
+
+ return;
+ }
+ else
+ {
+ /* Consumer needs notification only if blocked. */
+ if ( test_and_clear_bit(_VPF_blocked_in_xen, &v->pause_flags) )
+ vcpu_wake(v);
+ }
+
+out_unlock:
+ spin_unlock(&iorp->lock);
+
+ return;
+}
+
int hvm_vcpu_initialise(struct vcpu *v)
{
int rc;
@@ -1113,13 +1230,21 @@ int hvm_vcpu_initialise(struct vcpu *v)
goto fail3;
/* Create ioreq event channel. */
- rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL);
+ rc = alloc_unbound_xen_event_channel(v, dm_domid, hvm_io_notification_fn);
if ( rc < 0 )
goto fail4;
/* Register ioreq event channel. */
v->arch.hvm_vcpu.xen_port = rc;
+ /* Create ioreq vgt event channel. */
+ rc = alloc_unbound_xen_event_channel(v, 0, hvm_io_notification_fn);
+ if ( rc < 0 )
+ goto fail4;
+
+ /* Register ioreq vgt event channel. */
+ v->arch.hvm_vcpu.vgt_port = rc;
+
if ( v->vcpu_id == 0 )
{
/* Create bufioreq event channel. */
@@ -1131,7 +1256,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
spin_lock(&d->arch.hvm_domain.ioreq.lock);
if ( d->arch.hvm_domain.ioreq.va != NULL )
+ {
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
+ get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port;
+ }
spin_unlock(&d->arch.hvm_domain.ioreq.lock);
spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
@@ -1225,6 +1353,7 @@ void hvm_vcpu_down(struct vcpu *v)
bool_t hvm_send_assist_req(struct vcpu *v)
{
ioreq_t *p;
+ int port;
if ( unlikely(!vcpu_start_shutdown_deferral(v)) )
return 0; /* implicitly bins the i/o operation */
@@ -1238,14 +1367,16 @@ bool_t hvm_send_assist_req(struct vcpu *v)
return 0;
}
- prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
+ port = p->is_vgt ? v->arch.hvm_vcpu.vgt_port : v->arch.hvm_vcpu.xen_port;
+
+ prepare_wait_on_xen_event_channel(port);
/*
* Following happens /after/ blocking and setting up ioreq contents.
* prepare_wait_on_xen_event_channel() is an implicit barrier.
*/
p->state = STATE_IOREQ_READY;
- notify_via_xen_event_channel(v->domain, v->arch.hvm_vcpu.xen_port);
+ notify_via_xen_event_channel(v->domain, port);
return 1;
}
@@ -1458,7 +1589,8 @@ int hvm_hap_nested_page_fault(paddr_t gpa,
* to the mmio handler.
*/
if ( (p2mt == p2m_mmio_dm) ||
- (access_w && (p2mt == p2m_ram_ro)) )
+ (access_w && (p2mt == p2m_ram_ro)) ||
+ (p2mt == p2m_writeprotection) )
{
put_gfn(p2m->domain, gfn);
if ( !handle_mmio() )
@@ -3819,7 +3951,10 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
if ( iorp->va != NULL )
/* Initialise evtchn port info if VCPUs already created. */
for_each_vcpu ( d, v )
+ {
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
+ get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port;
+ }
spin_unlock(&iorp->lock);
break;
case HVM_PARAM_BUFIOREQ_PFN:
@@ -3898,9 +4033,17 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
if ( rc )
break;
+ rc = hvm_replace_event_channel(v, a.value,
+ &v->arch.hvm_vcpu.vgt_port);
+ if ( rc )
+ break;
+
spin_lock(&iorp->lock);
if ( iorp->va != NULL )
+ {
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
+ get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port;
+ }
spin_unlock(&iorp->lock);
}
domain_unpause(d);
@@ -4398,6 +4541,84 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
}
+ case HVMOP_vgt_wp_pages: {
+ xen_hvm_vgt_wp_pages_t wp;
+ struct domain *d;
+
+ if ( copy_from_guest(&wp, arg, 1 ) )
+ return -EFAULT;
+
+ if ( wp.nr_pages >= MAX_WP_BATCH_PAGES )
+ return -EFAULT;
+
+ d = rcu_lock_domain_by_any_id(wp.domid);
+ if ( d == NULL )
+ return -ESRCH;
+
+ if ( !is_hvm_domain(d) ){
+ rc = -EINVAL;
+ }
+ else {
+ rc = hap_write_protect_pages(d, wp.wp_pages, wp.nr_pages, wp.set);
+ }
+ rcu_unlock_domain(d);
+
+ break;
+ }
+
+ case HVMOP_vgt_map_mmio: {
+ xen_hvm_vgt_map_mmio_t memmap;
+ struct domain *d;
+
+ if ( copy_from_guest(&memmap, arg, 1 ) )
+ return -EFAULT;
+
+ d = rcu_lock_domain_by_id(memmap.domid);
+ if ( d == NULL )
+ return -ESRCH;
+
+ if ( !is_hvm_domain(d) ){
+ rc = -EINVAL;
+ goto param_fail_vgt_map_mmio;
+ }
+
+ if ( memmap.map )
+ vgt_domctl_add_range(d, memmap.first_gfn,
+ memmap.first_mfn, memmap.nr_mfns);
+ else
+ vgt_domctl_remove_range(d, memmap.first_gfn,
+ memmap.first_mfn, memmap.nr_mfns);
+
+ param_fail_vgt_map_mmio:
+ rcu_unlock_domain(d);
+
+ break;
+ }
+
+ case HVMOP_vgt_enable: {
+ xen_hvm_vgt_enable_t vgt_enable;
+ struct domain *d;
+
+ if ( copy_from_guest(&vgt_enable, arg, 1 ) )
+ return -EFAULT;
+
+ d = rcu_lock_domain_by_any_id(vgt_enable.domid);
+ if ( d == NULL )
+ return -ESRCH;
+
+ if ( !is_hvm_domain(d) ){
+ rc = -EINVAL;
+ goto param_fail_vgt_enable;
+ }
+
+ rc = vgt_hvm_init(d);
+
+ param_fail_vgt_enable:
+ rcu_unlock_domain(d);
+
+ break;
+ }
+
case HVMOP_xentrace: {
xen_hvm_xentrace_t tr;
diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
index 5bb1c17..d13cae4 100644
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -22,6 +22,7 @@
#include <xen/types.h>
#include <xen/sched.h>
#include <asm/regs.h>
+#include <asm/vgt.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/hvm/domain.h>
@@ -39,7 +40,9 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
&vlapic_mmio_handler,
&vioapic_mmio_handler,
&msixtbl_mmio_handler,
- &iommu_mmio_handler
+ &iommu_mmio_handler,
+ &writeprotection_handler, /* write protection to guest pages */
+ &vgt_mmio_handler
};
static int hvm_mmio_access(struct vcpu *v,
@@ -224,6 +227,12 @@ int hvm_io_intercept(ioreq_t *p, int type)
int i;
unsigned long addr, size;
+ /*
+ * vGT memory access is high frequency, so it needs to be
+ * in fast path. We add hook here before the io chain
+ * walk. But 0xcf8/cfc is not in this list.
+ */
+
if ( type == HVM_PORTIO )
{
int rc = dpci_ioport_intercept(p);
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 5f5009a..6d6d434 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -284,6 +284,15 @@ void hvm_io_assist(void)
memcpy(&guest_cpu_user_regs()->eax,
&p->data, vio->io_size);
break;
+ case HVMIO_dispatched:
+ if ( vio->mmio_split )
+ {
+ /* Emulate and finish split MMIO write. */
+ vio->io_state = HVMIO_completed;
+ vio->io_data = p->data;
+ (void)handle_mmio();
+ }
+ break;
default:
break;
}
diff --git a/xen/arch/x86/hvm/vgt.c b/xen/arch/x86/hvm/vgt.c
new file mode 100644
index 0000000..61cb09b
--- /dev/null
+++ b/xen/arch/x86/hvm/vgt.c
@@ -0,0 +1,234 @@
+/*
+ * vgt.c: code to trace the vGT MMIO access in HVM domain
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/xmalloc.h>
+#include <xen/domain.h>
+#include <xen/domain_page.h>
+#include <xen/event.h>
+#include <xen/trace.h>
+#include <xen/lib.h>
+#include <xen/iocap.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/page.h>
+#include <asm/apic.h>
+#include <asm/hvm/hvm.h>
+#include <asm/hvm/io.h>
+#include <asm/hvm/support.h>
+#include <asm/p2m.h>
+#include <public/hvm/ioreq.h>
+#include <public/hvm/params.h>
+
+static int vgt_mmio_range(struct vcpu *v, unsigned long addr)
+{
+ return vgt_enabled(v->domain) &&
+ rangeset_contains_singleton(
+ v->domain->iomem_forward_caps, addr);
+}
+
+static int _vgt_mmio_read_write(struct vcpu *v)
+{
+ get_ioreq(v)->is_vgt = 1;
+
+ return X86EMUL_UNHANDLEABLE; /* notify dom0 vGT driver to handle */
+}
+
+static int vgt_mmio_read(
+ struct vcpu *v, unsigned long gpa,
+ unsigned long bytes, unsigned long *p_data)
+{
+ return _vgt_mmio_read_write(v);
+}
+
+static int vgt_mmio_write(struct vcpu *v, unsigned long gpa,
+ unsigned long bytes, unsigned long data)
+{
+ return _vgt_mmio_read_write(v);
+}
+
+const struct hvm_mmio_handler vgt_mmio_handler = {
+ .check_handler = vgt_mmio_range,
+ .read_handler = vgt_mmio_read,
+ .write_handler = vgt_mmio_write
+};
+
+/*
+ * Check the fault address to see if it is from writeprotection pages.
+ * Return 1: Yes, 0: No.
+ */
+static int writeprotecion_page(struct vcpu *v, unsigned long addr)
+{
+ unsigned long gpfn = addr >> PAGE_SHIFT;
+ struct wp_hash_table *he;
+
+ if (!vgt_enabled(v->domain))
+ return 0;
+
+ spin_lock(&wp_htable_lock(v->domain));
+ he = lookup_wp_hash_table(wp_htable(v->domain), gpfn);
+ spin_unlock(&wp_htable_lock(v->domain));
+
+ return (he != NULL);
+}
+
+const struct hvm_mmio_handler writeprotection_handler = {
+ .check_handler = writeprotecion_page,
+ .read_handler = vgt_mmio_read,
+ .write_handler = vgt_mmio_write
+};
+
+
+int vgt_wp_hash_add(struct wp_hash_table *wp_ht, unsigned long gpfn)
+{
+ int index = wp_hash(gpfn);
+ struct wp_hash_table *ne;
+
+ if ( lookup_wp_hash_table(wp_ht, gpfn) != NULL )
+ return -EINVAL;
+ if (wp_ht[index].gpfn == WP_INVALID_GPFN)
+ wp_ht[index].gpfn = gpfn;
+ else {
+ /* self-automicity */
+ ne = xmalloc_bytes(sizeof(wp_ht[0]));
+ if ( ne == NULL )
+ return -ENOMEM;
+ ne->next = wp_ht[index].next;
+ ne->gpfn = gpfn;
+ wp_ht[index].next = ne;
+ }
+ return 0;
+}
+
+int vgt_wp_hash_rem(struct wp_hash_table *wp_ht, unsigned long gpfn)
+{
+ int index = wp_hash(gpfn);
+ struct wp_hash_table *next, *prev;
+
+ if (wp_ht[index].gpfn == gpfn) {
+ wp_ht[index].gpfn = WP_INVALID_GPFN;
+ }
+ else {
+ prev = &wp_ht[index];
+ while (1) {
+ next=prev->next;
+ if ( next == NULL ) {
+ printk("vgt_wp_hash_rem hash_table %p remove %lx not found\n",
+ wp_ht, gpfn);
+ return -EINVAL;
+ }
+ if (next->gpfn == gpfn) {
+ /* self-automicity */
+ prev->next = next->next;
+ xfree (next);
+ break;
+ }
+ prev = next;
+ }
+ }
+ return 0;
+}
+
+struct wp_hash_table *lookup_wp_hash_table(
+ struct wp_hash_table *wp_ht,
+ unsigned long gpfn)
+{
+ int index = wp_hash(gpfn);
+ struct wp_hash_table *entry;
+
+ for ( entry=&wp_ht[index]; entry!=NULL; entry=entry->next) {
+ if (entry->gpfn == gpfn)
+ break;
+ }
+ return entry;
+}
+
+static void free_vgt_wp_hash_chain(struct wp_hash_table *chain)
+{
+ struct wp_hash_table *p = chain;
+ struct wp_hash_table *n;
+
+ while (p) {
+ n = p->next;
+ xfree (p);
+ p = n;
+ }
+}
+
+void free_vgt_wp_hash(struct wp_hash_table *wp_ht)
+{
+ int i;
+
+ for ( i = 0; i < WP_HASH_SIZE; i++ )
+ if ( wp_ht[i].next != NULL )
+ free_vgt_wp_hash_chain (wp_ht[i].next);
+}
+
+/* ret:
+ 0 - success
+ negative - non success
+*/
+int vgt_domctl_add_range(struct domain *d, unsigned long gfn,
+ unsigned long mfn, unsigned long nr_mfns)
+{
+ unsigned long hva;
+ int ret, i;
+
+ printk("DPCI_ADD_MAPPING_VGT : domid=%d gfn=0x%lx mfn=0x%lx nr_mfns=0x%lx\n",
+ d->domain_id, gfn, mfn, nr_mfns );
+
+ /* direct map without trap & emulation */
+ ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
+ for ( i = 0; i < nr_mfns; i++ )
+ set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i));
+
+ hva = (unsigned long) mfn_to_virt(mfn);
+ ret = map_pages_to_xen(hva, mfn, nr_mfns, PAGE_HYPERVISOR_NOCACHE);
+ if (ret != 0)
+ {
+ printk("Warning: mapping domain page error\n");
+ domain_crash(current->domain);
+ }
+
+ return ret;
+}
+
+/* ret:
+ 0 - success
+ negative - failure
+*/
+int vgt_domctl_remove_range(struct domain *d, unsigned long gfn,
+ unsigned long mfn, unsigned long nr_mfns)
+{
+ int i, ret;
+ /*
+ remove the pfn and mfn record for MMIO trap and emulation
+ */
+ printk("DPCI_REMOVE_MAPPING_VGT : domid=%d gfn=%lx mfn=%lx nr_mfns=%lx\n",
+ d->domain_id, gfn, mfn, nr_mfns);
+
+ for ( i = 0; i < nr_mfns; i++ )
+ clear_mmio_p2m_entry(d, gfn+i);
+ ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
+ destroy_xen_mappings( (unsigned long)mfn_to_virt(mfn),
+ (unsigned long)mfn_to_virt(mfn + nr_mfns - 1) );
+ return ret;
+}
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index e8aa61c..5225037 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -34,6 +34,7 @@
#include <xen/sched.h>
#include <xen/irq.h>
#include <public/hvm/ioreq.h>
+#include <public/vcpu.h>
#include <asm/hvm/io.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 2ed25c7..eec13cf 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -1273,7 +1273,8 @@ void vmx_do_resume(struct vcpu *v)
* 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
* If VT-d engine can force snooping, we don't need to do these.
*/
- if ( has_arch_pdevs(v->domain) && !iommu_snoop
+ if ( ((has_arch_pdevs(v->domain) && !iommu_snoop) ||
+ vgt_enabled(v->domain))
&& !cpu_has_wbinvd_exiting )
{
int cpu = v->arch.hvm_vmx.active_cpu;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 0e5d3b4..6412e8c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -2140,10 +2140,8 @@ static void wbinvd_ipi(void *info)
static void vmx_wbinvd_intercept(void)
{
- if ( !has_arch_mmios(current->domain) )
- return;
-
- if ( iommu_snoop )
+ if ( (!has_arch_mmios(current->domain) || iommu_snoop) &&
+ !vgt_enabled(current->domain) )
return;
if ( cpu_has_wbinvd_exiting )
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 91d2c2b..3b907bd 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1019,6 +1019,20 @@ get_page_from_l4e(
} while ( 0 )
#endif
+/*
+ * No need to remove reserved bits when pte is marked non-present,
+ * since a non-present to present change normally implicates a
+ * completely new bit set
+ */
+#define adjust_guest_l1e_rsvd(pl1e, d, mfn) \
+ do { \
+ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
+ iomem_forward_permitted(d, mfn, mfn) ) \
+ { \
+ l1e_add_rsvd((pl1e), _PAGE_FORWARD); \
+ } \
+ } while ( 0 )
+
#define adjust_guest_l2e(pl2e, d) \
do { \
if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
@@ -1211,7 +1225,14 @@ static int alloc_l1_table(struct page_info *page)
break;
}
+ if ( l1e_get_rsvd(pl1e[i]) & (_PAGE_FORWARD|_PAGE_PRESENT) )
+ {
+ MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!");
+ goto fail;
+ }
+
adjust_guest_l1e(pl1e[i], d);
+ adjust_guest_l1e_rsvd(pl1e[i], d, l1e_get_pfn(pl1e[i]));
}
unmap_domain_page(pl1e);
@@ -1702,10 +1723,37 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
return -EINVAL;
}
+ if ( l1e_has_changed_rsvd(ol1e, nl1e, _PAGE_FORWARD) )
+ {
+ /*
+ * Condition check whether guest itself uses reserved bit.
+ * Suppose reserved bits are used only when P bit is cleared,
+ * such as in swap purpose
+ */
+ if ( !(l1e_get_rsvd(ol1e) & _PAGE_FORWARD) &&
+ (l1e_get_rsvd(nl1e) & _PAGE_FORWARD) )
+ {
+ MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!");
+ return -EINVAL;
+ }
+
+ /*
+ * Guest may not compose the new entry based on old content,
+ * and thus it's possible to have Xen populated reserved bits lost.
+ * Warn such case, but it should be fine, since we anyway adjust
+ * l1e to enforce the forwarding requirement.
+ */
+ if ( ((l1e_get_rsvd(ol1e) & (_PAGE_FORWARD|_PAGE_PRESENT)) ==
+ (_PAGE_FORWARD|_PAGE_PRESENT)) &&
+ !(l1e_get_rsvd(nl1e) & _PAGE_FORWARD) )
+ MEM_LOG("Guest tempts to clear forwarding bit set by Xen!");
+ }
+
/* Fast path for identical mapping, r/w and presence. */
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
{
adjust_guest_l1e(nl1e, pt_dom);
+ adjust_guest_l1e_rsvd(nl1e, pt_dom, l1e_get_pfn(nl1e));
if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
preserve_ad) )
{
@@ -1735,6 +1783,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
put_page(page);
adjust_guest_l1e(nl1e, pt_dom);
+ adjust_guest_l1e_rsvd(nl1e, pt_dom, l1e_get_pfn(nl1e));
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
preserve_ad)) )
{
@@ -4678,6 +4727,61 @@ static int xenmem_add_to_physmap(struct domain *d,
return xenmem_add_to_physmap_once(d, xatp);
}
+static int get_mfn_from_pfn(XEN_GUEST_HANDLE(xen_get_mfn_from_pfn_t) arg)
+{
+ struct xen_get_mfn_from_pfn cmd_info;
+ struct domain *d;
+ int rc=0, i;
+ xen_pfn_t *pfns = NULL;
+ xen_pfn_t pfn;
+ struct p2m_domain *p2m;
+ p2m_type_t t;
+
+ if ( !is_hardware_domain(current->domain) )
+ return -EPERM;
+
+ if ( copy_from_guest(&cmd_info, arg, 1) )
+ return -EFAULT;
+
+ d = rcu_lock_domain_by_any_id(cmd_info.domid);
+ if ( d == NULL )
+ return -ESRCH;
+
+ /* sanity check for security */
+ if (cmd_info.nr_pfns > 2048 )
+ return -ENOMEM;
+
+ pfns = xmalloc_array(xen_pfn_t, cmd_info.nr_pfns);
+ if (pfns == NULL)
+ return -ENOMEM;
+
+ if (copy_from_guest(pfns, cmd_info.pfn_list, cmd_info.nr_pfns)){
+ rc = -EFAULT;
+ goto out;
+ }
+
+ p2m = p2m_get_hostp2m(d);
+ for(i=0; i < cmd_info.nr_pfns; i++){
+ pfn = pfns[i];
+ pfns[i] = mfn_x(get_gfn_query(d, pfn, &t));
+ if(pfns[i] == INVALID_MFN){
+ rc = -EINVAL;
+ goto out;
+ }
+ put_gfn(d, pfn);
+ }
+
+ if (copy_to_guest(cmd_info.pfn_list, pfns, cmd_info.nr_pfns)){
+ rc = -EFAULT;
+ goto out;
+ }
+
+out:
+ rcu_unlock_domain(d);
+ xfree(pfns);
+ return rc;
+}
+
long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
{
int rc;
@@ -4936,6 +5040,15 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
return rc;
}
+#ifdef __x86_64__
+ case XENMEM_get_sharing_freed_pages:
+ return mem_sharing_get_nr_saved_mfns();
+#endif
+
+ case XENMEM_get_mfn_from_pfn:
+ rc = get_mfn_from_pfn(guest_handle_cast(arg, xen_get_mfn_from_pfn_t));
+ break;
+
default:
return subarch_memory_op(op, arg);
}
@@ -5063,7 +5176,28 @@ static int ptwr_emulated_update(
break;
}
+ if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
+ {
+ ol1e = l1e_from_intpte(old);
+ /* same check as the comment in mod_l1e_entry */
+ if ( l1e_has_changed_rsvd(ol1e, nl1e, _PAGE_FORWARD) )
+ {
+ if ( !(l1e_get_rsvd(ol1e) & _PAGE_FORWARD) &&
+ (l1e_get_rsvd(nl1e) & _PAGE_FORWARD) )
+ {
+ MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!");
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ if ( ((l1e_get_rsvd(ol1e) & (_PAGE_FORWARD|_PAGE_PRESENT)) ==
+ (_PAGE_FORWARD|_PAGE_PRESENT)) &&
+ !(l1e_get_rsvd(nl1e) & _PAGE_FORWARD) )
+ MEM_LOG("Guest tempts to clear forwarding bit set by Xen!");
+ }
+ }
+
adjust_guest_l1e(nl1e, d);
+ adjust_guest_l1e_rsvd(nl1e, d, l1e_get_pfn(nl1e));
/* Checked successfully: do the update (write or cmpxchg). */
pl1e = map_domain_page(mfn);
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index bff05d9..5b89c0c 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -208,6 +208,46 @@ void hap_logdirty_init(struct domain *d)
hap_clean_dirty_bitmap);
}
+/*
+ * Write-protect guest pages.
+ * A guest page may be write-protected or unwrite-protected,
+ * to track the guest write operations.
+ * It is initially designed for GPU PPGTT page table pages.
+ * gpfn: array base containing the gpfns for write-protection.
+ * num: size of the array.
+ * wr: 1: write-protection, 0: unwrite-protection.
+ */
+int hap_write_protect_pages(struct domain *d,
+ unsigned long *gpfn, int num, int wr)
+{
+ int i;
+ p2m_type_t ot, nt;
+ int ret = 0;
+
+ if ( wr ) {
+ ot = p2m_ram_rw;
+ nt = p2m_writeprotection;
+ } else {
+ ot = p2m_writeprotection;
+ nt = p2m_ram_rw;
+ }
+
+ spin_lock(&wp_htable_lock(d));
+
+ for ( i=0; i < num; i++) {
+ p2m_change_type_range(d, gpfn[i], gpfn[i] + 1, ot, nt);
+ if (wr)
+ ret = vgt_wp_hash_add(wp_htable(d), gpfn[i]);
+ else
+ ret = vgt_wp_hash_rem(wp_htable(d), gpfn[i]);
+ }
+
+ spin_unlock(&wp_htable_lock(d));
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ return ret;
+}
+
/************************************************/
/* HAP SUPPORT FUNCTIONS */
/************************************************/
diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c
index 595c6e7..993e3a8 100644
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -69,6 +69,7 @@ static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_acces
entry->mfn);
break;
case p2m_ram_logdirty:
+ case p2m_writeprotection:
case p2m_ram_ro:
case p2m_ram_shared:
entry->r = entry->x = 1;
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 3733c7a..b27a5e4 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -478,6 +478,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
ret = -EFAULT;
if ( copy_from_guest(&set_iopl, arg, 1) != 0 )
break;
+ printk("VGT: PHYSDEVOP_set_iopl iopl %x\n", set_iopl.iopl);
ret = -EINVAL;
if ( set_iopl.iopl > 3 )
break;
@@ -488,9 +489,11 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
case PHYSDEVOP_set_iobitmap: {
struct physdev_set_iobitmap set_iobitmap;
+
ret = -EFAULT;
if ( copy_from_guest(&set_iobitmap, arg, 1) != 0 )
break;
+ printk("VGT: PHYSDEVOP_set_iobitmap !!!\n");
ret = -EINVAL;
if ( !guest_handle_okay(set_iobitmap.bitmap, IOBMP_BYTES) ||
(set_iobitmap.nr_ports > 65536) )
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 2162811..3e9a295 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -30,6 +30,7 @@
#include <asm/mtrr.h>
#include <asm/io_apic.h>
#include <asm/setup.h>
+#include <asm/vgt.h>
#include "cpu/mtrr/mtrr.h"
#include <xsm/xsm.h>
@@ -601,6 +602,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
}
break;
+ case XENPF_set_vgt_info:
+ {
+ ret = set_vgt_info(op->u.vgt_info.gen_dev_bdf,
+ op->u.vgt_info.gen_dev_type);
+ break;
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index adc5009..af51f9f 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -73,6 +73,7 @@
#include <asm/hpet.h>
#include <public/arch-x86/cpuid.h>
#include <xsm/xsm.h>
+#include <asm/vgt.h>
/*
* opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -424,7 +425,7 @@ void fatal_trap(int trapnr, struct cpu_user_regs *regs)
(regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
}
-static void do_guest_trap(
+void do_guest_trap(
int trapnr, const struct cpu_user_regs *regs, int use_error_code)
{
struct vcpu *v = current;
@@ -1343,6 +1344,11 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
return ret;
}
+ /* we take some reserved bits to indicate a trapped MMIO access */
+ if ( ((regs->error_code & (PFEC_page_present | PFEC_reserved_bit)) ==
+ (PFEC_page_present | PFEC_reserved_bit)) &&
+ do_rsvd_page_fault(v, addr, regs) )
+ return EXCRET_fault_fixed;
return 0;
}
@@ -1604,6 +1610,8 @@ static int guest_io_okay(
{
union { uint8_t bytes[2]; uint16_t mask; } x;
+ printk("VGT: =====: arch.pv_vcpu.iobmp_limit %x bitmap %lx\n",
+ v->arch.pv_vcpu.iobmp_limit, (unsigned long)v->arch.pv_vcpu.iobmp.p);
/*
* Grab permission bytes from guest space. Inaccessible bytes are
* read as 0xff (no access allowed).
@@ -1745,6 +1753,67 @@ static uint32_t guest_io_read(
return data;
}
+static bool_t vgt_cfg_space(unsigned int port, unsigned int bytes,
+ bool_t wr, struct vcpu *v, struct cpu_user_regs *regs)
+{
+ unsigned int _port;
+
+ if ( v->domain != dom0 )
+ return 0;
+
+ _port = port & ~3;
+ if ( _port == 0xCF8 && wr &&
+ CF8_to_BDF(regs->eax) == vgt_bdf ) {
+
+ ASSERT ((port & 3) == 0 && (bytes == 4));
+
+ /* VGT CFG space access, forward to vGT driver */
+ if ( ioports_forwarding_started(v->domain) ) {
+ /* Forward only after the guest is ready to process. */
+ v->domain->arch.pci_cf8 = regs->eax;
+ vgt_inject_virtual_exception (regs, 1);
+ return 1;
+ }
+ }
+ else if (_port == 0xCFC &&
+ CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf) {
+ if ( ioports_forwarding_started(v->domain) ) {
+ /* Forward only after the guest is ready to process. */
+ vgt_inject_virtual_exception (regs, 1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static bool_t pio_forwarding(
+ unsigned int port, unsigned int bytes, int wr,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ int perm, rc=0;
+
+ if ( vgt_cfg_space(port, bytes, wr, v, regs) )
+ return 1;
+
+ perm = ioports_forwarding_permitted(v->domain, port, port ) << 1;
+ perm |= ioports_forwarding_permitted(v->domain,
+ port + bytes - 1, port + bytes - 1);
+ switch ( perm ) {
+ case 0:
+ break;
+ case 3: /* all port are within forwarding list */
+ vgt_inject_virtual_exception (regs, 1);
+ rc = 1;
+ break;
+ case 1:
+ case 2:
+ default:
+ ASSERT (0); /* TODO */
+ break;
+ }
+ return rc;
+}
+
static void guest_io_write(
unsigned int port, unsigned int bytes, uint32_t data,
struct vcpu *v, struct cpu_user_regs *regs)
@@ -2027,6 +2096,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
(rd_ad(edi) > (data_limit - (op_bytes - 1))) ||
!guest_io_okay(port, op_bytes, v, regs) )
goto fail;
+ if ( pio_forwarding(port, op_bytes, 0, v, regs) )
+ goto skip;
data = guest_io_read(port, op_bytes, v, regs);
if ( (rc = copy_to_user((void *)data_base + rd_ad(edi),
&data, op_bytes)) != 0 )
@@ -2053,9 +2124,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
+ op_bytes - rc, 0);
return EXCRET_fault_fixed;
}
- guest_io_write(port, op_bytes, data, v, regs);
- wr_ad(esi, regs->esi + (int)((regs->eflags & X86_EFLAGS_DF)
+ if ( pio_forwarding(port, op_bytes, 1, v, regs) )
+ goto skip;
+ else
+ {
+ guest_io_write(port, op_bytes, data, v, regs);
+ wr_ad(esi, regs->esi + (int)((regs->eflags & X86_EFLAGS_DF)
? -op_bytes : op_bytes));
+ }
break;
}
@@ -2110,6 +2186,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_in:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
+ if ( pio_forwarding(port, op_bytes, 0, v, regs ) )
+ goto skip;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
mark_regs_dirty(regs);
@@ -2140,6 +2218,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_out:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
+ if ( pio_forwarding(port, op_bytes, 1, v, regs ) )
+ goto skip;
if ( admin_io_okay(port, op_bytes, v, regs) )
{
mark_regs_dirty(regs);
diff --git a/xen/arch/x86/vgt.c b/xen/arch/x86/vgt.c
new file mode 100644
index 0000000..688620f
--- /dev/null
+++ b/xen/arch/x86/vgt.c
@@ -0,0 +1,853 @@
+/*
+ * vgt.c: core logic to handle mediated GT passthrough
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/sched.h>
+#include <xen/console.h>
+#include <xen/domain.h>
+#include <xen/domain_page.h>
+#include <xen/iocap.h>
+#include <xen/xmalloc.h>
+#include <xen/delay.h>
+#include <xen/spinlock.h>
+#include <asm/shared.h>
+#include <asm/traps.h>
+#include <asm/paging.h>
+#include <asm/vgt.h>
+#include <asm/hvm/support.h>
+#include <asm/x86_emulate.h>
+#include <public/platform.h>
+
+#define VGT_LOG(_f, _a...) gdprintk(XENLOG_INFO, "vGT: " _f "\n" , ## _a)
+#define VGT_ERR(_f, _a...) gdprintk(XENLOG_ERR, "vGT: " _f "\n" , ## _a)
+//#define VGT_DEBUG
+#ifdef VGT_DEBUG
+#define VGT_DBG(_f, _a...) gdprintk(XENLOG_DEBUG, "vGT: " _f "\n" , ## _a)
+#else
+#define VGT_DBG(_f, _a...) ;
+#endif
+
+static unsigned int igd_gen = XEN_IGD_INVALID;
+unsigned long vgt_mmio_bar_start;
+char *vgt_mmio_hva;
+int gt_fifo_count;
+
+static enum {
+ FORCE_WAKE_STATE_ON,
+ FORCE_WAKE_STATE_OFF
+} force_wake_state = FORCE_WAKE_STATE_OFF;
+
+static DEFINE_SPINLOCK(vgt_io_lock);
+
+int (*force_wake_need)(unsigned long reg);
+void (*force_wake_get)(void);
+void (*force_wake_put)(void);
+
+#define VGT_REG_READ(offset) *((const volatile u32 *)(vgt_mmio_hva + offset))
+#define VGT_REG_WRITE(offset, val) *((volatile u32 *)(vgt_mmio_hva + offset)) = (val)
+
+#define FORCEWAKE 0xA18C
+#define FORCEWAKE_ACK_HSW 0x130044
+#define FORCEWAKE_ACK 0x130090
+#define FORCEWAKE_MT 0xa188 /* multi-threaded */
+#define FORCEWAKE_MT_ACK 0x130040
+#define ECOBUS 0xa180
+#define FORCEWAKE_MT_ENABLE (1<<5)
+
+#define GTFIFODBG 0x120000
+#define GT_FIFO_CPU_ERROR_MASK 7
+#define GT_FIFO_OVFERR (1<<2)
+#define GT_FIFO_IAWRERR (1<<1)
+#define GT_FIFO_IARDERR (1<<0)
+
+#define GT_FIFO_FREE_ENTRIES 0x120008
+#define GT_FIFO_NUM_RESERVED_ENTRIES 20
+
+#define GEN6_GT_THREAD_STATUS_REG 0x13805c
+#define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7
+#define GEN6_GT_THREAD_STATUS_CORE_MASK_HSW (0x7 | (0x07 << 16))
+
+#define _MASKED_BIT_ENABLE(a) (((a) << 16) | (a))
+#define _MASKED_BIT_DISABLE(a) ((a) << 16)
+
+#define wait_for_atomic_us(COND, US) ({ \
+ int i, ret__ = -1; \
+ for (i = 0; i < (US); i++) { \
+ if ((COND)) { \
+ ret__ = 0; \
+ break; \
+ } \
+ udelay(1); \
+ } \
+ ret__; \
+ })
+
+static int gen6_force_wake_need(unsigned long reg)
+{
+ reg -= vgt_mmio_bar_start;
+ return reg < 0x40000 && reg != FORCEWAKE;
+}
+
+static const u32 gen8_shadowed_regs[] = {
+ 0xa188,
+ 0xa008,
+ 0xa00c,
+ 0x2030,
+ 0x12030,
+ 0x1a030,
+ 0x22030,
+};
+
+static int is_gen8_shadowed(u32 reg)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++)
+ if (reg == gen8_shadowed_regs[i])
+ return 1;
+
+ return 0;
+}
+
+static int gen8_force_wake_need(unsigned long reg)
+{
+ reg -= vgt_mmio_bar_start;
+ return reg < 0x40000 && !is_gen8_shadowed(reg);
+}
+
+static void gen6_wait_for_thread_c0(void)
+{
+ u32 gt_thread_status_mask;
+
+ WARN_ON(igd_gen == XEN_IGD_INVALID);
+
+ if (igd_gen == XEN_IGD_HSW)
+ gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
+ else
+ gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
+
+ /* w/a for a sporadic read returning 0
+ * by waiting for the GT thread to wake up.
+ */
+ if (wait_for_atomic_us((VGT_REG_READ(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
+ VGT_ERR("GT thread status wait timed out\n");
+}
+
+void gen6_gt_check_fifodbg(void)
+{
+ u32 gtfifodbg;
+ gtfifodbg = VGT_REG_READ(GTFIFODBG);
+ if (gtfifodbg & GT_FIFO_CPU_ERROR_MASK) {
+ VGT_ERR("MMIO read or write has been dropped %x\n", gtfifodbg);
+ VGT_REG_WRITE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
+ }
+}
+
+
+void gen6_force_wake_get (void)
+{
+ u32 forcewake_ack;
+
+ WARN_ON(igd_gen == XEN_IGD_INVALID);
+
+ if (igd_gen == XEN_IGD_HSW)
+ forcewake_ack = FORCEWAKE_ACK_HSW;
+ else
+ forcewake_ack = FORCEWAKE_ACK;
+
+ if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1) == 0, 500))
+ VGT_ERR("Force wake wait timed out\n");
+
+ VGT_REG_WRITE(FORCEWAKE, 1);
+ VGT_REG_READ(ECOBUS);
+
+ if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1), 500))
+ VGT_ERR("Force wake wait timed out\n");
+
+ gen6_wait_for_thread_c0();
+}
+
+void gen6_force_wake_put (void)
+{
+ VGT_REG_WRITE(FORCEWAKE, 0);
+ VGT_REG_READ(ECOBUS);
+ gen6_gt_check_fifodbg();
+}
+
+void gen7_force_wake_mt_get (void)
+{
+ u32 forcewake_ack;
+
+ WARN_ON(igd_gen == XEN_IGD_INVALID);
+
+ if (igd_gen == XEN_IGD_HSW || igd_gen == XEN_IGD_BDW)
+ forcewake_ack = FORCEWAKE_ACK_HSW;
+ else
+ forcewake_ack = FORCEWAKE_MT_ACK;
+
+ if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1) == 0, 500))
+ VGT_ERR("Force wake wait timed out\n");
+
+ VGT_REG_WRITE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1));
+ VGT_REG_READ(ECOBUS);
+
+ if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1), 500))
+ VGT_ERR("Force wake wait timed out\n");
+
+ if (igd_gen != XEN_IGD_BDW)
+ gen6_wait_for_thread_c0();
+}
+
+void gen7_force_wake_mt_put (void)
+{
+ VGT_REG_WRITE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1));
+ VGT_REG_READ(ECOBUS);
+ gen6_gt_check_fifodbg();
+}
+
+int gen6_gt_wait_for_fifo(void)
+{
+ int ret = 0;
+
+ if (gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
+ int loop = 500;
+ u32 fifo = VGT_REG_READ(GT_FIFO_FREE_ENTRIES);
+ while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
+ udelay(10);
+ fifo = VGT_REG_READ(GT_FIFO_FREE_ENTRIES);
+ }
+ if (loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES)
+ ++ret;
+ gt_fifo_count = fifo;
+ }
+ gt_fifo_count--;
+
+ return ret;
+}
+
+int vgt_set_mmio_trap(struct domain *d, unsigned long mfn_start, unsigned long mfn_end)
+{
+ unsigned long hva;
+ int rc;
+
+ hva = (unsigned long) mfn_to_virt ( mfn_start );
+ rc = map_pages_to_xen ( hva, mfn_start, mfn_end - mfn_start + 1, PAGE_HYPERVISOR_NOCACHE );
+ VGT_LOG ( "Setup MMIO range [0x%lx - 0x%lx](hva 0x%lx) for trap. return %d\n",
+ mfn_start << PAGE_SHIFT, mfn_end << PAGE_SHIFT, hva, rc );
+
+ if (rc)
+ printk("vgt_set_mmio_trap failed %d\n", rc);
+ else {
+ vgt_mmio_bar_start = mfn_start << PAGE_SHIFT;
+ vgt_mmio_hva = (char *)hva;
+ VGT_LOG("vgt_mmio_bar_start: 0x%lx\n", vgt_mmio_bar_start);
+ VGT_LOG("vgt_mmio_hva: 0x%lx\n", (unsigned long)vgt_mmio_hva);
+ }
+ return rc;
+}
+
+unsigned int vgt_bdf = -1; /* -1 means an invalid value */
+
+int set_vgt_info(unsigned int gen_dev_bdf, unsigned int gen_dev_type)
+{
+ /* We only support GEN dev that's on bus 0 */
+ if ( PCI_BUS(gen_dev_bdf) != 0 )
+ return -EINVAL;
+
+ if ( gen_dev_type > XEN_IGD_MAX )
+ return -EINVAL;
+
+ vgt_bdf = gen_dev_bdf;
+ igd_gen = gen_dev_type;
+ VGT_LOG("vgt_bdf=0x%x, gen=%d\n", vgt_bdf, igd_gen);
+
+ WARN_ON(igd_gen == XEN_IGD_INVALID);
+
+ if (igd_gen == XEN_IGD_BDW) {
+ VGT_LOG("Use MT force wake\n");
+ force_wake_need = gen8_force_wake_need;
+ force_wake_get = gen7_force_wake_mt_get;
+ force_wake_put = gen7_force_wake_mt_put;
+ } else {
+ VGT_LOG("Set default force wake\n");
+ force_wake_need = gen6_force_wake_need;
+ force_wake_get = gen6_force_wake_get;
+ force_wake_put = gen6_force_wake_put;
+
+ if ( igd_gen == XEN_IGD_IVB || igd_gen == XEN_IGD_HSW ) {
+ int ecobus;
+
+ gen7_force_wake_mt_get();
+ ecobus = VGT_REG_READ(ECOBUS);
+ gen7_force_wake_mt_put();
+
+ if (ecobus & FORCEWAKE_MT_ENABLE)
+ {
+ VGT_LOG("Use MT force wake\n");
+ force_wake_get = gen7_force_wake_mt_get;
+ force_wake_put = gen7_force_wake_mt_put;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static inline void store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static int hypercall_read_sysdata(struct vcpu *v, struct vcpu_sysdata_request *req)
+{
+ int bytes = req->ops.rd.bytes;
+
+#if 0
+ printk("hypercall_read_sysdata: src %lx bytes %x\n",
+ req->ops.rd.src_addr, req->ops.rd.bytes);
+#endif
+ if (bytes > 8)
+ bytes = 8;
+
+ req->ops.rd.sys_data = 0;
+ memcpy (&req->ops.rd.sys_data, (void*)req->ops.rd.src_addr, bytes);
+ return 1;
+}
+
+static int hypercall_get_segment(struct vcpu *v, struct vcpu_sysdata_request *req)
+{
+ struct desc_ptr gdtr;
+ struct desc_struct *desc_tbl, *pdesc;
+ uint16_t sel = req->ops.seg.selector;
+
+ req->ops.seg.xdt_desc[0] = 0;
+ req->ops.seg.xdt_desc[1] = 0;
+ if (sel & 4) {
+ printk("hypercall_get_selector %x in LDT, "
+ "not supported yet\n", sel);
+ return 0;
+ }
+ store_gdt(&gdtr);
+ desc_tbl = (struct desc_struct *) gdtr.base;
+ if ( sel >= gdtr.limit ) {
+ printk("hypercall_get_selector selector %x exceeds size"
+ "gdtr base %lx size %x\n", sel, gdtr.base, gdtr.limit);
+ return 0;
+ }
+ pdesc = desc_tbl + (sel>>3);
+ req->ops.seg.xdt_desc[0] = *(uint64_t *)pdesc;
+ if (!(pdesc->b & (1<<12))) /* S bit = 0: system descriptor */
+ {
+ pdesc++;
+ req->ops.seg.xdt_desc[1] = *(uint64_t *)pdesc;
+ }
+ return 1;
+}
+
+int hypercall_get_sysdata(struct vcpu *v, struct vcpu_sysdata_request *req)
+{
+ if (req->op_type == VCPUOP_sysdata_get_segment)
+ return hypercall_get_segment (v, req);
+ else if ( req->op_type == VCPUOP_sysdata_read )
+ {
+ return hypercall_read_sysdata (v, req);
+ }
+ else {
+ printk("Wrong hypercall_get_sysdata op_type %d\n", (int)req->op_type);
+ return 0;
+ }
+}
+
+/*
+ * The service to finish I/O access on behave of the requester, in
+ * case that direct access to said I/O resources is trapped.
+ *
+ * Do we need a lock here? Perhaps not, since it's guest driver's
+ * responsibilty to avoid race contention on same MMIO...
+ *
+ * FIXME: though PIO logic is included here, we only trap MMIO for
+ * current stage. Need to add back GP handler hook to support PIO
+ * trap in the future if required.
+ */
+int hypercall_io_emulation(struct vcpu *v, struct vcpu_emul_ioreq *req)
+{
+ unsigned long data;
+ int rc = 1;
+
+ VGT_DBG("I/O request: %s, %lx\n",
+ req->type == PV_IOREQ_TYPE_PIO ? "pio" : "mmio", req->addr);
+
+ if ( req->type == PV_IOREQ_TYPE_PIO )
+ {
+ ASSERT( req->size <= 4 );
+ if ( req->dir == PV_IOREQ_READ )
+ {
+ if ( (req->addr & ~3) == 0xcfc ) {
+ ASSERT (v->domain == dom0 );
+ ASSERT (CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf);
+ data = pci_conf_read (v->domain->arch.pci_cf8,
+ req->addr & 3, req->size);
+ memcpy (&req->data, &data, req->size);
+ return 1;
+ }
+ else if ( (req->addr & ~3) == 0xcf8 ) {
+ printk("VGT: hypercall_io_emulation: read %x, "
+ "unexpected to be here!!!\n", (unsigned int)req->addr);
+ }
+ switch (req->size)
+ {
+ case 1: req->data = inb ( req->addr ); break;
+ case 2: req->data = inw ( req->addr ); break;
+ case 4: req->data = inl ( req->addr ); break;
+ default: ASSERT (0); break;
+ }
+ } // PV_IOREQ_READ
+ else
+ {
+ if ( (req->addr & ~3) == 0xcfc ) {
+ ASSERT (v->domain == dom0 );
+ ASSERT (CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf);
+ pci_conf_write (v->domain->arch.pci_cf8,
+ (uint8_t) req->addr & 3,
+ (uint8_t)req->size, (uint32_t)req->data);
+ return 1;
+ }
+ else if ( (req->addr & ~3) == 0xcf8 ) {
+ printk("VGT: hypercall_io_emulation: write %x, "
+ "unexpected to be here!!!\n", (unsigned int)req->addr);
+ }
+ switch ( req->size )
+ {
+ case 1:
+ outb ( (uint8_t)req->data, req->addr );
+ break;
+ case 2:
+ outw ( (uint16_t)req->data, req->addr );
+ break;
+ case 4:
+ outl ( req->data, req->addr );
+ break;
+ default:
+ ASSERT ( 0 );
+ break;
+ }
+ } // PV_IOREQ_WRITE
+ } // PIO
+ else if (req->type == PV_IOREQ_TYPE_COPY) /* MMIO */
+ {
+ void *hva = maddr_to_virt ( req->addr );
+ int force_wake = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vgt_io_lock, flags);
+
+ if ( req->dir == PV_IOREQ_READ )
+ {
+ if (force_wake_need(req->addr) && (force_wake_state == FORCE_WAKE_STATE_OFF)) {
+ force_wake = 1;
+ force_wake_get();
+ }
+
+ req->data = 0;
+ switch ( req->size )
+ {
+ case 1:
+ req->data = *(uint8_t *)hva;
+ break;
+ case 2:
+ req->data = *(uint16_t *)hva;
+ break;
+ case 4:
+ req->data = *(uint32_t *)hva;
+ break;
+ case 8:
+ req->data = *(uint64_t *)hva;
+ break;
+ default:
+ ASSERT ( 0 );
+ break;
+ }
+
+ if (force_wake) {
+ force_wake_put();
+ }
+ } // read
+ else
+ {
+ int fifo_ret = 0;
+ if (igd_gen == XEN_IGD_BDW) {
+ if (force_wake_need(req->addr)
+ && force_wake_state == FORCE_WAKE_STATE_OFF) {
+ force_wake = 1;
+ force_wake_get();
+ }
+ } else if (force_wake_need(req->addr)) {
+ fifo_ret = gen6_gt_wait_for_fifo();
+ }
+
+ switch ( req->size )
+ {
+ case 1:
+ *(uint8_t *)hva = req->data;
+ break;
+ case 2:
+ *(uint16_t *)hva = req->data;
+ break;
+ case 4: *(uint32_t *)hva = req->data;
+ break;
+ case 8: *(uint64_t *)hva = req->data;
+ break;
+ default:
+ ASSERT ( 0 );
+ break;
+ }
+ if (force_wake) {
+ force_wake_put();
+ } else if (fifo_ret) {
+ gen6_gt_check_fifodbg();
+ }
+ } // write
+
+ spin_unlock_irqrestore(&vgt_io_lock, flags);
+
+ } // mmio
+ else if (req->type == PV_IOREQ_TYPE_CTRL)
+ {
+ unsigned long flags;
+
+ spin_lock_irqsave(&vgt_io_lock, flags);
+
+ switch(req->addr)
+ {
+ case VGT_CTRL_FORCEWAKE_PUT:
+ if (force_wake_state == FORCE_WAKE_STATE_ON){
+ force_wake_put();
+ force_wake_state = FORCE_WAKE_STATE_OFF;
+ }
+ break;
+ case VGT_CTRL_FORCEWAKE_GET:
+ if (force_wake_state == FORCE_WAKE_STATE_OFF){
+ force_wake_get();
+ force_wake_state = FORCE_WAKE_STATE_ON;
+ }
+ break;
+ default:
+ rc = 0;
+ ASSERT(0);
+ break;
+ }
+
+ spin_unlock_irqrestore(&vgt_io_lock, flags);
+ }
+ else {
+ printk("%s: invalid type\n", __func__);
+ rc = 0;
+ }
+
+ return rc;
+}
+
+/* Without this, after Dom0 S3, MMIO read to IGD can always return zero */
+void vgt_resume(void)
+{
+ if (force_wake_state == FORCE_WAKE_STATE_ON)
+ force_wake_get();
+}
+
+/*
+ * Take use of hypervisor selector in error code to indicate a
+ * vGT related event.
+ *
+ * P.S. will this cause any backward compatibility issue when
+ * __HYPERVISOR_CS is changed someday?
+ */
+void vgt_inject_virtual_exception(struct cpu_user_regs *regs, int pio)
+{
+ if (pio)
+ regs->error_code = __HYPERVISOR_CS + 4;
+ else
+ regs->error_code = __HYPERVISOR_CS;
+ do_guest_trap(TRAP_gp_fault, regs, 1);
+}
+
+int do_rsvd_page_fault(struct vcpu *v, unsigned long addr,
+ struct cpu_user_regs *regs)
+{
+ l1_pgentry_t pte;
+ int flags;
+
+ ASSERT ( v->domain == dom0 );
+ /* Only handle the fault triggered by delibrate reserved bit */
+ ASSERT ( (regs->error_code & (PFEC_page_present | PFEC_reserved_bit)) ==
+ (PFEC_page_present | PFEC_reserved_bit) );
+
+ /* Attempt to read the PTE that maps the VA being accessed. */
+ guest_get_eff_l1e ( v, addr, &pte );
+
+#if 1
+{
+ static long cnt = 0;
+ if (cnt++ < 1000)
+ VGT_DBG("vGT: captured %ldth rsvd fault (%lx, %lx)\n", cnt, addr, (l1e_get_pfn(pte) << PAGE_SHIFT) | (addr & ~PAGE_MASK));
+}
+#endif
+
+ /* We're only looking for page fault purely caused by Xen */
+ flags = l1e_get_flags(pte);
+ if ( (regs->error_code & PFEC_insn_fetch) ||
+ ((regs->error_code & PFEC_write_access) && !(flags & _PAGE_RW)) ||
+ ((regs->error_code & PFEC_user_mode) && !(flags & _PAGE_USER)) )
+ {
+ VGT_LOG("vGT: return guest fault instead on %lx (%x, %x)\n", addr, regs->error_code, flags);
+ regs->error_code &= ~PFEC_reserved_bit;
+ goto bail;
+ }
+
+ vgt_inject_virtual_exception(regs, 0);
+ return EXCRET_fault_fixed;
+
+ bail:
+ return 0;
+}
+
+/* FIXME: change to static */
+int vgt_hvm_intercept_io(ioreq_t *p)
+{
+ p->is_vgt = 1;
+
+ /* return X86EMUL_UNHANDLEABLE to forward to dom0 vGT driver */
+ return X86EMUL_UNHANDLEABLE;
+}
+
+static int vgt_intercept_cf8_cfc(
+ int dir, uint32_t port, uint32_t bytes, uint32_t *val)
+{
+ struct domain *d = current->domain;
+ ioreq_t *p = get_ioreq(current);
+
+ if ( (port & ~3) == 0xcf8 )
+ {
+ if (bytes != 4){
+ printk("VGT_WARNING: vgt_intercept_cf8_cfc bytes=%d not 4 bytes\n", bytes);
+ return X86EMUL_OKAY;
+ }
+ if ((port & 3) != 0){
+ printk("VGT_WARNING: vgt_intercept_cf8_cfc port=0x%x not aligned\n", bytes);
+ return X86EMUL_OKAY;
+ }
+ if (dir == IOREQ_WRITE) {
+ /* Write */
+ d->arch.pci_cf8 = *val;
+ if ( CF8_to_BDF(d->arch.pci_cf8) == vgt_bdf )
+ return vgt_hvm_intercept_io(p);
+ else
+ return X86EMUL_UNHANDLEABLE;
+ }
+ else {
+ /* Read */
+ *val = d->arch.pci_cf8;
+ return X86EMUL_OKAY;
+ }
+ }
+ else {
+ /* CFC access */
+ if ( CF8_to_BDF(d->arch.pci_cf8) == vgt_bdf )
+ {
+ /* To vGT device */
+ return vgt_hvm_intercept_io(p);
+ }
+ /* To traditional Device Model */
+ return X86EMUL_UNHANDLEABLE;
+ }
+}
+
+int vgt_hvm_init(struct domain *d)
+{
+ /* register vGT PIO & MMIO handler */
+
+ ASSERT( is_hvm_domain(d) );
+
+ if ( vgt_enabled(d) )
+ return 0;
+
+ printk("Enable vGT for domain %d\n", d->domain_id);
+
+ wp_htable(d) = xmalloc_bytes(WP_HASH_ENTRY_SIZE * WP_HASH_SIZE);
+ if ( wp_htable(d) == NULL )
+ return -ENOMEM;
+
+ d->arch.hvm_domain.vgt_enabled = 1;
+
+ register_portio_handler(d, 0xcf8, 8, vgt_intercept_cf8_cfc);
+
+ memset (wp_htable(d), 0, WP_HASH_ENTRY_SIZE * WP_HASH_SIZE);
+
+ /* FIXME: get the PIO & MMIO base&size */
+// register_portio_handler(d, vgt_pio_base, vgt_pio_size, vgt_intercept_io);
+
+ return 0;
+}
+
+
+void vgt_hvm_deinit(struct domain *d)
+{
+ if (!vgt_enabled(d))
+ return;
+
+ rangeset_destroy(d->iomem_forward_caps);
+ free_vgt_wp_hash( wp_htable(d) );
+ xfree ( wp_htable(d) );
+}
+
+int vgt_io_trap(struct domain *d, struct xen_domctl_vgt_io_trap *info)
+{
+ struct rangeset *rs;
+
+ void *range;
+
+ int r;
+ int i;
+
+ if (!d)
+ d = dom0;
+
+ /* Process PIO trap range. */
+ rs = d->arch.ioport_forwarding_caps;
+
+ if (info->n_pio) {
+ /* Add range. */
+ for (i = 0; i < info->n_pio; i++) {
+ r = rangeset_add_range(rs, info->pio[i].s, info->pio[i].e);
+ if (r) {
+ printk("VGT(%d): fail to add PIO range [0x%"PRIx64" - 0x%"PRIx64"].\n",
+ d->domain_id, info->pio[i].s, info->pio[i].e);
+ goto err;
+ }
+ }
+
+ printk("DOM %d IO rangeset:\n", d->domain_id);
+
+ rangeset_printk(rs);
+
+ printk("\n");
+ } else {
+ /* Query rangeset. */
+ range = first_range_ex(rs);
+
+ for (i = 0; range && i < MAX_VGT_IO_TRAP_INFO; i++)
+ range = get_range(rs, range, &info->pio[i].s, &info->pio[i].e);
+
+ info->n_pio = i;
+ }
+
+ /* Process MMIO trap range. */
+ rs = d->iomem_forward_caps;
+
+ if (info->n_mmio) {
+ /* Add MMIO range. */
+
+ /* XXX: remove the old trap info. This is necessary since vGT
+ * doesn't emulate the 4M MMIO BAR of GEN dev correctly now:
+ * vGT should be improved to use untrap-then-trap logic when
+ * the BAR is updated.
+ */
+ rangeset_destroy(d->iomem_forward_caps);
+ d->iomem_forward_caps = rangeset_new(d, "I/O Memory Forwarding",
+ RANGESETF_prettyprint_hex);
+
+ if (!d->iomem_forward_caps) {
+ printk("VGT(%d): fail to allocate rangeset for io trap.\n",
+ d->domain_id);
+ return -ENOMEM;
+ }
+
+ rs = d->iomem_forward_caps;
+
+ for (i = 0; i < info->n_mmio; i++) {
+ printk("VGT(%d): Add MMIO range [0x%"PRIx64" - 0x%"PRIx64"].\n",
+ d->domain_id, info->mmio[i].s, info->mmio[i].e);
+
+ if (d == dom0) {
+ if (info->mmio[i].s & ~PAGE_MASK
+ || info->mmio[i].e & ~PAGE_MASK) {
+ printk("VGT(%d): MMIO range is not page-aligned.\n",
+ d->domain_id);
+
+ r = -EINVAL;
+ goto err;
+ }
+
+ /* DOM0 use MFN. */
+ info->mmio[i].s >>= PAGE_SHIFT;
+ info->mmio[i].e >>= PAGE_SHIFT;
+ }
+
+ r = rangeset_add_range(rs, info->mmio[i].s, info->mmio[i].e);
+ if (r) {
+ printk("VGT(%d): fail to add MMIO range",
+ d->domain_id);
+
+ goto err;
+ }
+
+ if (d == dom0) {
+ /*
+ * Map MMIO range into XEN,
+ * becuase we will access some GFX registers in XEN.
+ */
+ r = vgt_set_mmio_trap(d, info->mmio[i].s, info->mmio[i].e);
+ if (r) {
+ printk("VGT(%d): fail to map MMIO range.\n",
+ d->domain_id);
+
+ goto err;
+ }
+ }
+ }
+
+ printk("DOM %d MMIO rangeset:\n", d->domain_id);
+
+ rangeset_printk(rs);
+
+ printk("\n");
+
+ } else {
+ /* Query MMIO range. */
+ range = first_range_ex(rs);
+
+ for (i = 0; range && i < MAX_VGT_IO_TRAP_INFO; i++) {
+ range = get_range(rs, range, &info->mmio[i].s, &info->mmio[i].e);
+
+ if (d == dom0) {
+ info->mmio[i].s <<= PAGE_SHIFT;
+ info->mmio[i].e <<= PAGE_SHIFT;
+ }
+ }
+
+ info->n_mmio = i;
+ }
+
+ return 0;
+
+err:
+ info->n_pio = info->n_mmio = 0;
+ return r;
+}
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index e390c2a..932a104 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4196,7 +4196,9 @@ x86_emulate(
if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) )
rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
ea.bytes, ctxt);
- goto done;
+ if (rc)
+ goto done;
+ break;
}
case 0x80 ... 0x8f: /* jcc (near) */ {
diff --git a/xen/common/domain.c b/xen/common/domain.c
index fac3470..317386f 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -248,8 +248,11 @@ struct domain *domain_create(
init_status |= INIT_rangeset;
d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
+ d->iomem_forward_caps = rangeset_new(d, "I/O Memory Forwarding",
+ RANGESETF_prettyprint_hex);
d->irq_caps = rangeset_new(d, "Interrupts", 0);
- if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
+ if ( (d->iomem_caps == NULL) || (d->iomem_forward_caps == NULL)
+ || (d->irq_caps == NULL) )
goto fail;
if ( domcr_flags & DOMCRF_dummy )
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 9bd8f80..e056c1b 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -298,6 +298,11 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
case XEN_DOMCTL_test_assign_device:
d = NULL;
break;
+ case XEN_DOMCTL_vgt_io_trap:
+ if (!op->domain) {
+ d = NULL;
+ break;
+ }
default:
d = rcu_lock_domain_by_id(op->domain);
if ( d == NULL )
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 64c976b..fdfafd5 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -1140,20 +1140,14 @@ void free_xen_event_channel(
(void)__evtchn_close(d, port);
}
-
-void notify_via_xen_event_channel(struct domain *ld, int lport)
+void __notify_via_xen_event_channel(struct domain *ld, int lport)
{
struct evtchn *lchn, *rchn;
struct domain *rd;
int rport;
- spin_lock(&ld->event_lock);
-
if ( unlikely(ld->is_dying) )
- {
- spin_unlock(&ld->event_lock);
return;
- }
ASSERT(port_is_valid(ld, lport));
lchn = evtchn_from_port(ld, lport);
@@ -1166,6 +1160,14 @@ void notify_via_xen_event_channel(struct domain *ld, int lport)
rchn = evtchn_from_port(rd, rport);
evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
}
+}
+
+
+void notify_via_xen_event_channel(struct domain *ld, int lport)
+{
+ spin_lock(&ld->event_lock);
+
+ __notify_via_xen_event_channel(ld, lport);
spin_unlock(&ld->event_lock);
}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 5072133..26d8dae 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -20,6 +20,7 @@
#include <xen/init.h>
#include <asm/debugger.h>
#include <asm/div64.h>
+#include <asm/hvm/support.h>
static struct keyhandler *key_table[256];
static unsigned char keypress_key;
diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c
index f09c0c4..050a775 100644
--- a/xen/common/rangeset.c
+++ b/xen/common/rangeset.c
@@ -438,3 +438,18 @@ void rangeset_domain_printk(
spin_unlock(&d->rangesets_lock);
}
+
+void *first_range_ex(struct rangeset *r)
+{
+ return first_range(r);
+}
+
+void *get_range(struct rangeset *r, void *x,
+ unsigned long *s, unsigned long *e)
+{
+ struct range *y = x;
+
+ *s = y->s;
+ *e = y->e;
+ return next_range(r, y);
+}
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 909f449..dfe15b4 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -214,7 +214,14 @@ struct paging_vcpu {
#define MAX_CPUID_INPUT 40
typedef xen_domctl_cpuid_t cpuid_input_t;
-#define MAX_NESTEDP2M 10
+/*
+ * Adding new data structure into struct domain exceeds the PAGE_SIZE
+ * limitation, and extending to multiple page may be risky.
+ * We simpliy borrrow space from some unused data structures for now,
+ * and revisit some time later.
+ */
+//#define MAX_NESTEDP2M 10
+#define MAX_NESTEDP2M 1
struct p2m_domain;
struct time_scale {
int shift;
@@ -251,6 +258,7 @@ struct arch_domain
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
+ struct rangeset *ioport_forwarding_caps;
uint32_t pci_cf8;
uint8_t cmos_idx;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 27b3de5..74375a8 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -41,6 +41,24 @@ struct hvm_ioreq_page {
void *va;
};
+struct wp_hash_table {
+ struct wp_hash_table *next;
+ unsigned long gpfn;
+};
+#define WP_HASH_SIZE_SHIFT 8
+#define WP_HASH_SIZE (1 << WP_HASH_SIZE_SHIFT)
+#define wp_hash(x) ((x) % WP_HASH_SIZE)
+#define WP_INVALID_GPFN 0
+#define WP_HASH_ENTRY_SIZE sizeof(struct wp_hash_table)
+#define wp_htable(d) (d->arch.hvm_domain.vgt_wp_hash_base)
+#define wp_htable_lock(d) (d->arch.hvm_domain.vgt_wp_hash_lock)
+int vgt_wp_hash_add(struct wp_hash_table *wp_ht, unsigned long gpfn);
+int vgt_wp_hash_rem(struct wp_hash_table *wp_ht, unsigned long gpfn);
+struct wp_hash_table *lookup_wp_hash_table(
+ struct wp_hash_table *wp_ht,
+ unsigned long gpfn);
+void free_vgt_wp_hash(struct wp_hash_table *wp_ht);
+
struct hvm_domain {
struct hvm_ioreq_page ioreq;
struct hvm_ioreq_page buf_ioreq;
@@ -100,9 +118,13 @@ struct hvm_domain {
struct vmx_domain vmx;
struct svm_domain svm;
};
+ bool_t vgt_enabled;
+ struct wp_hash_table *vgt_wp_hash_base;
+ spinlock_t vgt_wp_hash_lock;
};
#define hap_enabled(d) ((d)->arch.hvm_domain.hap_enabled)
+#define vgt_enabled(d) ((d)->arch.hvm_domain.vgt_enabled)
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 4d303e6..151d24a 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -194,6 +194,11 @@ struct hvm_function_table {
bool_t access_w, bool_t access_x);
};
+int vgt_domctl_add_range(struct domain *d, unsigned long gfn,
+ unsigned long mfn, unsigned long nr_mfns);
+int vgt_domctl_remove_range(struct domain *d, unsigned long gfn,
+ unsigned long mfn, unsigned long nr_mfns);
+
extern struct hvm_function_table hvm_funcs;
extern bool_t hvm_enabled;
extern bool_t cpu_has_lmsl;
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 410a5f6..a237c71 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -70,8 +70,10 @@ extern const struct hvm_mmio_handler vlapic_mmio_handler;
extern const struct hvm_mmio_handler vioapic_mmio_handler;
extern const struct hvm_mmio_handler msixtbl_mmio_handler;
extern const struct hvm_mmio_handler iommu_mmio_handler;
+extern const struct hvm_mmio_handler vgt_mmio_handler;
+extern const struct hvm_mmio_handler writeprotection_handler;
-#define HVM_MMIO_HANDLER_NR 5
+#define HVM_MMIO_HANDLER_NR 7
int hvm_io_intercept(ioreq_t *p, int type);
void register_io_handler(
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index e8b8cd7..871998a 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -44,12 +44,23 @@ struct hvm_vcpu_asid {
uint32_t asid;
};
+/* Max MMIO read or write size in one instruction emulation */
+#define MAX_INS_EMULATE_MMIO_SIZE 32
+
struct hvm_vcpu_io {
/* I/O request in flight to device model. */
enum hvm_io_state io_state;
unsigned long io_data;
int io_size;
+ int mmio_split;
+ int mmio_split_size;
+ int mmio_split_dir;
+
+ paddr_t mmio_split_pa;
+ unsigned int mmio_split_done_size;
+ uint8_t mmio_split_buf[MAX_INS_EMULATE_MMIO_SIZE];
+
/*
* HVM emulation:
* Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn.
@@ -170,6 +181,7 @@ struct hvm_vcpu {
struct hvm_trap inject_trap;
struct viridian_vcpu viridian;
+ int vgt_port; /* event channle port to notify dom0 vGT driver */
};
#endif /* __ASM_X86_HVM_VCPU_H__ */
diff --git a/xen/include/asm-x86/iocap.h b/xen/include/asm-x86/iocap.h
index 591ae17..dfb7958 100644
--- a/xen/include/asm-x86/iocap.h
+++ b/xen/include/asm-x86/iocap.h
@@ -14,6 +14,14 @@
#define ioports_access_permitted(d, s, e) \
rangeset_contains_range((d)->arch.ioport_caps, s, e)
+#define ioports_permit_forwarding(d, s, e) \
+ rangeset_add_range((d)->arch.ioport_forwarding_caps, s, e)
+#define ioports_forwarding_permitted(d, s, e) \
+ rangeset_contains_range((d)->arch.ioport_forwarding_caps, s, e)
+#define ioports_forwarding_started(d) \
+ (!rangeset_is_empty((d)->arch.ioport_forwarding_caps))
+
+
#define cache_flush_permitted(d) \
(!rangeset_is_empty((d)->iomem_caps) || \
!rangeset_is_empty((d)->arch.ioport_caps))
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 43583b2..2656de0 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -70,6 +70,7 @@ typedef enum {
p2m_ram_paging_in = 11, /* Memory that is being paged in */
p2m_ram_shared = 12, /* Shared or sharable memory */
p2m_ram_broken = 13, /* Broken page, access cause domain crash */
+ p2m_writeprotection = 14, /* write-protection guest PPGTT PT pages */
} p2m_type_t;
/*
@@ -656,6 +657,8 @@ void p2m_flush_nestedp2m(struct domain *d);
void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level);
+int hap_write_protect_pages(struct domain *d,
+ unsigned long *gpfn, int num, int wr);
#endif /* _XEN_P2M_H */
/*
diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
index 2c3cc2d..e2d3e86 100644
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -71,6 +71,10 @@
#define l4e_get_pfn(x) \
((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT))
+/* Get reserved bits mapped by pte(unsigned long) */
+#define l1e_get_rsvd(x) \
+ ((unsigned long)(((x).l1 & (PRSVD_MASK&(~PADDR_MASK)))))
+
/* Get physical address of page mapped by pte (paddr_t). */
#define l1e_get_paddr(x) \
((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
@@ -157,6 +161,12 @@ static inline l4_pgentry_t l4e_from_paddr(paddr_t pa, unsigned int flags)
#define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags))
#define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags))
+/* Add extra reserved bits to an existing pte */
+#define l1e_add_rsvd(x, bits) ((x).l1 |= (bits))
+
+/* Remove reserved bits from an existing pte */
+#define l1e_remove_rsvd(x, bits) ((x).l1 &= ~(bits))
+
/* Check if a pte's page mapping or significant access flags have changed. */
#define l1e_has_changed(x,y,flags) \
( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
@@ -167,6 +177,10 @@ static inline l4_pgentry_t l4e_from_paddr(paddr_t pa, unsigned int flags)
#define l4e_has_changed(x,y,flags) \
( !!(((x).l4 ^ (y).l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
+/* Check if a pte's reserved bits have changed */
+#define l1e_has_changed_rsvd(x,y,bits) \
+ ( !!(((x).l1 ^ (y).l1) & ((PRSVD_MASK&(~PADDR_MASK))|(bits))) )
+
/* Pagetable walking. */
#define l2e_to_l1e(x) ((l1_pgentry_t *)__va(l2e_get_paddr(x)))
#define l3e_to_l2e(x) ((l2_pgentry_t *)__va(l3e_get_paddr(x)))
diff --git a/xen/include/asm-x86/traps.h b/xen/include/asm-x86/traps.h
index 82cbcee..5420920 100644
--- a/xen/include/asm-x86/traps.h
+++ b/xen/include/asm-x86/traps.h
@@ -32,6 +32,8 @@ extern void machine_check_vector(struct cpu_user_regs *regs, long error_code);
void async_exception_cleanup(struct vcpu *);
+extern void do_guest_trap(
+ int trapnr, const struct cpu_user_regs *regs, int use_error_code);
/**
* guest_has_trap_callback
*
diff --git a/xen/include/asm-x86/vgt.h b/xen/include/asm-x86/vgt.h
new file mode 100644
index 0000000..2e0afa8
--- /dev/null
+++ b/xen/include/asm-x86/vgt.h
@@ -0,0 +1,43 @@
+/*
+ * vgt.h: vGT related definitions
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+#ifndef __ASM_X86_VGT_H__
+#define __ASM_X86_VGT_H__
+
+extern int vgt_io_trap(struct domain *d, struct xen_domctl_vgt_io_trap *info);
+extern int set_vgt_info(unsigned int gen_dev_bdf, unsigned int gen_dev_type);
+extern int vgt_hvm_init(struct domain *d);
+extern void vgt_hvm_deinit(struct domain *d);
+extern int hypercall_io_emulation(struct vcpu *v, struct vcpu_emul_ioreq *req);
+extern int hypercall_get_sysdata(struct vcpu *v,
+ struct vcpu_sysdata_request *req);
+extern int do_rsvd_page_fault(struct vcpu *v, unsigned long addr,
+ struct cpu_user_regs *regs);
+extern void vgt_inject_virtual_exception(struct cpu_user_regs *regs, int pio);
+extern int vgt_set_mmio_trap(struct domain *d, unsigned long mfn_start, unsigned long mfn_end);
+
+int vgt_hvm_intercept_io(ioreq_t *p);
+
+#define CF8_to_BDF(cf8) (((unsigned int)(cf8) >> 8) & 0xffff)
+extern unsigned int vgt_bdf;
+
+void vgt_resume(void);
+
+#endif // __ASM_X86_VGT_H__
+
+
diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h
index c193c88..5ee47fd 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -22,10 +22,24 @@
#define __PAGE_OFFSET DIRECTMAP_VIRT_START
#define __XEN_VIRT_START XEN_VIRT_START
-/* These are architectural limits. Current CPUs support only 40-bit phys. */
-#define PADDR_BITS 52
+/*
+ * These are architectural limits. Current CPUs support only 40-bit phys.
+ *
+ * There's a requirement to trap-and-emulate MMIO from PV guest (such as
+ * in vGT), so we adopt an approach by using reserved bit to trap concerned
+ * MMIO ranges. That means we need reserve some highest bits under
+ * architectural limit for that purpose. Currently only one reserved bit is
+ * stolen here, and it should work well on client platforms for quite a long
+ * time. It should be enough to come up other architectural innovation before
+ * all bits are supported by CPUs, if this usage continues to be appealing.
+ * :-)
+ */
+#define PADDR_ARCH_BITS 52
+#define PRSVD_BITS PADDR_ARCH_BITS
+#define PADDR_BITS 51
#define VADDR_BITS 48
#define PADDR_MASK ((1UL << PADDR_BITS)-1)
+#define PRSVD_MASK ((1UL << PRSVD_BITS)-1)
#define VADDR_MASK ((1UL << VADDR_BITS)-1)
#define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63))
@@ -161,6 +175,9 @@ typedef l4_pgentry_t root_pgentry_t;
/* Bit 22 of a 24-bit flag mask. This corresponds to bit 62 of a pte.*/
#define _PAGE_GNTTAB (1U<<22)
+/* Bit 51 of the pte, as the indicator for MMIO trap-and-emulation */
+#define _PAGE_FORWARD (1UL<<51)
+
#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 4c5b2bb..869f381 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -537,6 +537,24 @@ struct xen_domctl_ioport_mapping {
typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
+#define MAX_VGT_IO_TRAP_INFO 4
+
+struct vgt_io_trap_info {
+ uint64_t s;
+ uint64_t e;
+};
+
+struct xen_domctl_vgt_io_trap {
+ uint32_t n_pio;
+ struct vgt_io_trap_info pio[MAX_VGT_IO_TRAP_INFO];
+
+ uint32_t n_mmio;
+ struct vgt_io_trap_info mmio[MAX_VGT_IO_TRAP_INFO];
+};
+
+typedef struct xen_domctl_vgt_io_trap xen_domctl_vgt_io_trap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vgt_io_trap_t);
+
/*
* Pin caching type of RAM space for x86 HVM domU.
@@ -924,6 +942,9 @@ struct xen_domctl {
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
#define XEN_DOMCTL_gdbsx_domstatus 1003
+
+#define XEN_DOMCTL_vgt_io_trap 700
+
uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
domid_t domain;
union {
@@ -979,7 +1000,8 @@ struct xen_domctl {
struct xen_domctl_set_broken_page_p2m set_broken_page_p2m;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;
- uint8_t pad[128];
+ struct xen_domctl_vgt_io_trap vgt_io_trap;
+ uint8_t pad[256];
} u;
};
typedef struct xen_domctl xen_domctl_t;
diff --git a/xen/include/public/hvm/e820.h b/xen/include/public/hvm/e820.h
index 5bdc227..683f091 100644
--- a/xen/include/public/hvm/e820.h
+++ b/xen/include/public/hvm/e820.h
@@ -27,7 +27,7 @@
#define HVM_E820_NR_OFFSET 0x000001E8
#define HVM_E820_OFFSET 0x000002D0
-#define HVM_BELOW_4G_RAM_END 0xF0000000
+#define HVM_BELOW_4G_RAM_END 0xC0000000
#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END
#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
index a9aab4b..4bfdd85 100644
--- a/xen/include/public/hvm/hvm_op.h
+++ b/xen/include/public/hvm/hvm_op.h
@@ -270,6 +270,36 @@ struct xen_hvm_inject_msi {
typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t;
DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t);
+#define HVMOP_vgt_map_mmio 18
+struct xen_hvm_vgt_map_mmio {
+ uint16_t domid;
+ uint16_t map; /* 1: Map, 0: Unmap */
+ uint32_t nr_mfns;
+ uint64_t first_gfn;
+ uint64_t first_mfn;
+};
+typedef struct xen_hvm_vgt_map_mmio xen_hvm_vgt_map_mmio_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_map_mmio_t);
+
+#define HVMOP_vgt_enable 19
+struct xen_hvm_vgt_enable {
+ uint16_t domid;
+};
+typedef struct xen_hvm_vgt_enable xen_hvm_vgt_enable_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_enable_t);
+
+#define HVMOP_vgt_wp_pages 20 /* writeprotection to guest pages */
+struct xen_hvm_vgt_wp_pages {
+#define MAX_WP_BATCH_PAGES 128
+ domid_t domid;
+ uint16_t set; /* 1: set WP, 0: remove WP */
+ uint16_t nr_pages;
+ unsigned long wp_pages[MAX_WP_BATCH_PAGES];
+};
+typedef struct xen_hvm_vgt_wp_pages xen_hvm_vgt_wp_pages_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_wp_pages_t);
+
+
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index f05d130..c1a215a 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -54,8 +54,9 @@ struct ioreq {
* of the real data to use. */
uint8_t dir:1; /* 1=read, 0=write */
uint8_t df:1;
- uint8_t _pad1:1;
+ uint8_t is_vgt:1; /* if 1, it is vGT I/O request */
uint8_t type; /* I/O type */
+ uint32_t vgt_eport; /* evtchn for notification to/from vGT driver */
};
typedef struct ioreq ioreq_t;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 7a26dee..a3d3611 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -461,6 +461,29 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t);
#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+/*
+ * Translate the given guest PFNs to MFNs
+ */
+#define XENMEM_get_mfn_from_pfn 25
+struct xen_get_mfn_from_pfn {
+ /*
+ * Pointer to buffer to fill with list of pfn.
+ * for IN, it contains the guest PFN that need to translated
+ * for OUT, it contains the translated MFN. or INVALID_MFN if no valid translation
+ */
+ XEN_GUEST_HANDLE(xen_pfn_t) pfn_list;
+
+ /*
+ * IN: Size of the pfn_array.
+ */
+ unsigned int nr_pfns;
+
+ /* IN: which domain */
+ domid_t domid;
+};
+typedef struct xen_get_mfn_from_pfn xen_get_mfn_from_pfn_t;
+DEFINE_XEN_GUEST_HANDLE(xen_get_mfn_from_pfn_t);
+
#endif /* __XEN_PUBLIC_MEMORY_H__ */
/*
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 4341f54..295ee43 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -527,6 +527,21 @@ struct xenpf_core_parking {
typedef struct xenpf_core_parking xenpf_core_parking_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t);
+#define XENPF_set_vgt_info 61
+
+#define XEN_IGD_INVALID 0
+#define XEN_IGD_SNB 1
+#define XEN_IGD_IVB 2
+#define XEN_IGD_HSW 3
+#define XEN_IGD_BDW 4
+#define XEN_IGD_MAX 4 /* the max GEN dev type supported */
+struct xenpf_vgt_info {
+ unsigned int gen_dev_bdf;
+ unsigned int gen_dev_type;
+};
+typedef struct xenpf_vgt_info xenpf_vgt_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_vgt_info_t);
+
/*
* ` enum neg_errnoval
* ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -553,6 +568,7 @@ struct xen_platform_op {
struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add;
struct xenpf_core_parking core_parking;
+ struct xenpf_vgt_info vgt_info;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h
index e888daf..5fa5066 100644
--- a/xen/include/public/vcpu.h
+++ b/xen/include/public/vcpu.h
@@ -227,6 +227,57 @@ struct vcpu_register_time_memory_area {
typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
+/* Request an I/O emulation for the specified VCPU. */
+#define VCPUOP_request_io_emulation 14
+
+#define PV_IOREQ_READ 1
+#define PV_IOREQ_WRITE 0
+
+#define PV_IOREQ_TYPE_PIO 0 /* pio */
+#define PV_IOREQ_TYPE_COPY 1 /* mmio ops */
+#define PV_IOREQ_TYPE_CTRL 2 /* vGT control ops */
+
+/* for "addr" field , when "type" is PV_IOREQ_TYPE_CTRL */
+#define VGT_CTRL_FORCEWAKE_GET 0
+#define VGT_CTRL_FORCEWAKE_PUT 1
+
+struct vcpu_emul_ioreq {
+ uint64_t addr; /* physical address */
+ uint64_t data; /* data (or paddr of data) */
+ uint64_t count; /* for rep prefixes */
+ uint32_t size; /* size in bytes */
+ uint16_t _pad0;
+ uint8_t state:4;
+ uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr
+ * of the real data to use. */
+ uint8_t dir:1; /* 1=read, 0=write */
+ uint8_t df:1;
+ uint8_t _pad1:1;
+ uint8_t type; /* I/O type */
+};
+typedef struct vcpu_emul_ioreq vcpu_emul_ioreq_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_emul_ioreq_t);
+
+#define VCPUOP_get_sysdata 16
+/* sub operations */
+#define VCPUOP_sysdata_get_segment 0
+#define VCPUOP_sysdata_read 1
+struct vcpu_sysdata_request {
+ uint64_t op_type;
+ union {
+ struct {
+ uint32_t selector;
+ uint32_t pad1;
+ uint64_t xdt_desc[2];
+ /* System descriptors uses 2 descriptors in IA32e */
+ } seg;
+ struct {
+ uint64_t src_addr; /* linear address */
+ uint64_t sys_data;
+ uint32_t bytes;
+ } rd;
+ } ops;
+};
#endif /* __XEN_PUBLIC_VCPU_H__ */
/*
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index fe179b9..4c6021a 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -789,6 +789,7 @@ struct xen_multiboot_mod_list
/* Unused, must be zero */
uint32_t pad;
};
+
/*
* `incontents 200 startofday_dom0_console Dom0_console
*
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 4ac39ad..3d80a9d 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -66,6 +66,9 @@ void free_xen_event_channel(
/* Query if event channel is in use by the guest */
int guest_enabled_event(struct vcpu *v, uint32_t virq);
+/* Notify remote end of a Xen-attached event channel - no event lock held. */
+void __notify_via_xen_event_channel(struct domain *ld, int lport);
+
/* Notify remote end of a Xen-attached event channel.*/
void notify_via_xen_event_channel(struct domain *ld, int lport);
diff --git a/xen/include/xen/iocap.h b/xen/include/xen/iocap.h
index b755ecb..16a88be 100644
--- a/xen/include/xen/iocap.h
+++ b/xen/include/xen/iocap.h
@@ -17,6 +17,13 @@
#define iomem_access_permitted(d, s, e) \
rangeset_contains_range((d)->iomem_caps, s, e)
+#define iomem_permit_forward(d, s, e) \
+ rangeset_add_range((d)->iomem_forward_caps, s, e)
+#define iomem_deny_forward(d, s, e) \
+ rangeset_remove_range((d)->iomem_forward_caps, s, e)
+#define iomem_forward_permitted(d, s, e) \
+ rangeset_contains_range((d)->iomem_forward_caps, s, e)
+
#define irq_permit_access(d, i) \
rangeset_add_singleton((d)->irq_caps, i)
#define irq_deny_access(d, i) \
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index 1e16a6b..9d51aa4 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -73,4 +73,8 @@ void rangeset_printk(
void rangeset_domain_printk(
struct domain *d);
+void *first_range_ex(struct rangeset *r);
+void *get_range(struct rangeset *r, void *y,
+ unsigned long *s, unsigned long *e);
+
#endif /* __XEN_RANGESET_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ae6a3b8..8f7b255 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -284,6 +284,7 @@ struct domain
/* I/O capabilities (access to IRQs and memory-mapped I/O). */
struct rangeset *iomem_caps;
struct rangeset *irq_caps;
+ struct rangeset *iomem_forward_caps;
/* Is this an HVM guest? */
bool_t is_hvm;
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/tan_xiaopan/xengt-ha-xen-hag.git
git@gitee.com:tan_xiaopan/xengt-ha-xen-hag.git
tan_xiaopan
xengt-ha-xen-hag
xengt-ha-xen-hag
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385